@@ -1490,63 +1490,63 @@
;; DEST eew is smaller than SOURCE eew.
(define_insn "@pred_indexed_<order>load<mode>_x2_smaller_eew"
- [(set (match_operand:VEEWTRUNC2 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:VEEWTRUNC2 0 "register_operand" "=vd, vd, vr, vr, &vr, &vr")
(if_then_else:VEEWTRUNC2
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK")
- (match_operand 6 "const_int_operand" " i, i")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK")
+ (match_operand 6 "const_int_operand" " i, i, i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(unspec:VEEWTRUNC2
- [(match_operand 3 "pmode_register_operand" " r, r")
+ [(match_operand 3 "pmode_register_operand" " r, r, r, r, r, r")
(mem:BLK (scratch))
- (match_operand:<VINDEX_DOUBLE_EXT> 4 "register_operand" " vr, vr")] ORDER)
- (match_operand:VEEWTRUNC2 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VINDEX_DOUBLE_EXT> 4 "register_operand" " 0, 0, 0, 0, vr, vr")] ORDER)
+ (match_operand:VEEWTRUNC2 2 "vector_merge_operand" " vu, 0, vu, 0, vu, 0")))]
"TARGET_VECTOR"
"vl<order>xei<double_ext_sew>.v\t%0,(%3),%4%p1"
[(set_attr "type" "vld<order>x")
(set_attr "mode" "<MODE>")])
(define_insn "@pred_indexed_<order>load<mode>_x4_smaller_eew"
- [(set (match_operand:VEEWTRUNC4 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:VEEWTRUNC4 0 "register_operand" "=vd, vd, vr, vr, &vr, &vr")
(if_then_else:VEEWTRUNC4
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK")
- (match_operand 6 "const_int_operand" " i, i")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK")
+ (match_operand 6 "const_int_operand" " i, i, i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(unspec:VEEWTRUNC4
- [(match_operand 3 "pmode_register_operand" " r, r")
+ [(match_operand 3 "pmode_register_operand" " r, r, r, r, r, r")
(mem:BLK (scratch))
- (match_operand:<VINDEX_QUAD_EXT> 4 "register_operand" " vr, vr")] ORDER)
- (match_operand:VEEWTRUNC4 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VINDEX_QUAD_EXT> 4 "register_operand" " 0, 0, 0, 0, vr, vr")] ORDER)
+ (match_operand:VEEWTRUNC4 2 "vector_merge_operand" " vu, 0, vu, 0, vu, 0")))]
"TARGET_VECTOR"
"vl<order>xei<quad_ext_sew>.v\t%0,(%3),%4%p1"
[(set_attr "type" "vld<order>x")
(set_attr "mode" "<MODE>")])
(define_insn "@pred_indexed_<order>load<mode>_x8_smaller_eew"
- [(set (match_operand:VEEWTRUNC8 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:VEEWTRUNC8 0 "register_operand" "=vd, vd, vr, vr, &vr, &vr")
(if_then_else:VEEWTRUNC8
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK")
- (match_operand 6 "const_int_operand" " i, i")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK")
+ (match_operand 6 "const_int_operand" " i, i, i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(unspec:VEEWTRUNC8
- [(match_operand 3 "pmode_register_operand" " r, r")
+ [(match_operand 3 "pmode_register_operand" " r, r, r, r, r, r")
(mem:BLK (scratch))
- (match_operand:<VINDEX_OCT_EXT> 4 "register_operand" " vr, vr")] ORDER)
- (match_operand:VEEWTRUNC8 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VINDEX_OCT_EXT> 4 "register_operand" " 0, 0, 0, 0, vr, vr")] ORDER)
+ (match_operand:VEEWTRUNC8 2 "vector_merge_operand" " vu, 0, vu, 0, vu, 0")))]
"TARGET_VECTOR"
"vl<order>xei<oct_ext_sew>.v\t%0,(%3),%4%p1"
[(set_attr "type" "vld<order>x")
@@ -2420,15 +2420,15 @@
(set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
(define_insn "@pred_madc<mode>"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vr, &vr, &vr")
(unspec:<VM>
[(plus:VI
- (match_operand:VI 1 "register_operand" " vr, vr")
- (match_operand:VI 2 "vector_arith_operand" " vr, vi"))
- (match_operand:<VM> 3 "register_operand" " vm, vm")
+ (match_operand:VI 1 "register_operand" " %0, vr, vr")
+ (match_operand:VI 2 "vector_arith_operand" "vrvi, vr, vi"))
+ (match_operand:<VM> 3 "register_operand" " vm, vm, vm")
(unspec:<VM>
- [(match_operand 4 "vector_length_operand" " rK, rK")
- (match_operand 5 "const_int_operand" " i, i")
+ [(match_operand 4 "vector_length_operand" " rK, rK, rK")
+ (match_operand 5 "const_int_operand" " i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_VMADC))]
"TARGET_VECTOR"
@@ -2439,15 +2439,15 @@
(set (attr "avl_type") (symbol_ref "INTVAL (operands[5])"))])
(define_insn "@pred_msbc<mode>"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vr, vr, &vr")
(unspec:<VM>
[(minus:VI
- (match_operand:VI 1 "register_operand" " vr")
- (match_operand:VI 2 "register_operand" " vr"))
- (match_operand:<VM> 3 "register_operand" " vm")
+ (match_operand:VI 1 "register_operand" " 0, vr, vr")
+ (match_operand:VI 2 "register_operand" " vr, 0, vr"))
+ (match_operand:<VM> 3 "register_operand" " vm, vm, vm")
(unspec:<VM>
- [(match_operand 4 "vector_length_operand" " rK")
- (match_operand 5 "const_int_operand" " i")
+ [(match_operand 4 "vector_length_operand" " rK, rK, rK")
+ (match_operand 5 "const_int_operand" " i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_VMSBC))]
"TARGET_VECTOR"
@@ -2458,16 +2458,16 @@
(set (attr "avl_type") (symbol_ref "INTVAL (operands[5])"))])
(define_insn "@pred_madc<mode>_scalar"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vr, &vr")
(unspec:<VM>
[(plus:VI_QHS
(vec_duplicate:VI_QHS
- (match_operand:<VEL> 2 "register_operand" " r"))
- (match_operand:VI_QHS 1 "register_operand" " vr"))
- (match_operand:<VM> 3 "register_operand" " vm")
+ (match_operand:<VEL> 2 "register_operand" " r, r"))
+ (match_operand:VI_QHS 1 "register_operand" " 0, vr"))
+ (match_operand:<VM> 3 "register_operand" " vm, vm")
(unspec:<VM>
- [(match_operand 4 "vector_length_operand" " rK")
- (match_operand 5 "const_int_operand" " i")
+ [(match_operand 4 "vector_length_operand" " rK, rK")
+ (match_operand 5 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_VMADC))]
"TARGET_VECTOR"
@@ -2478,16 +2478,16 @@
(set (attr "avl_type") (symbol_ref "INTVAL (operands[5])"))])
(define_insn "@pred_msbc<mode>_scalar"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vr, &vr")
(unspec:<VM>
[(minus:VI_QHS
(vec_duplicate:VI_QHS
- (match_operand:<VEL> 2 "reg_or_0_operand" " rJ"))
- (match_operand:VI_QHS 1 "register_operand" " vr"))
- (match_operand:<VM> 3 "register_operand" " vm")
+ (match_operand:<VEL> 2 "reg_or_0_operand" " rJ, rJ"))
+ (match_operand:VI_QHS 1 "register_operand" " 0, vr"))
+ (match_operand:<VM> 3 "register_operand" " vm, vm")
(unspec:<VM>
- [(match_operand 4 "vector_length_operand" " rK")
- (match_operand 5 "const_int_operand" " i")
+ [(match_operand 4 "vector_length_operand" " rK, rK")
+ (match_operand 5 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_VMSBC))]
"TARGET_VECTOR"
@@ -2527,16 +2527,16 @@
})
(define_insn "*pred_madc<mode>_scalar"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vr, &vr")
(unspec:<VM>
[(plus:VI_D
(vec_duplicate:VI_D
- (match_operand:<VEL> 2 "reg_or_0_operand" " rJ"))
- (match_operand:VI_D 1 "register_operand" " vr"))
- (match_operand:<VM> 3 "register_operand" " vm")
+ (match_operand:<VEL> 2 "reg_or_0_operand" " rJ, rJ"))
+ (match_operand:VI_D 1 "register_operand" " 0, vr"))
+ (match_operand:<VM> 3 "register_operand" " vm, vm")
(unspec:<VM>
- [(match_operand 4 "vector_length_operand" " rK")
- (match_operand 5 "const_int_operand" " i")
+ [(match_operand 4 "vector_length_operand" " rK, rK")
+ (match_operand 5 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_VMADC))]
"TARGET_VECTOR"
@@ -2547,17 +2547,17 @@
(set (attr "avl_type") (symbol_ref "INTVAL (operands[5])"))])
(define_insn "*pred_madc<mode>_extended_scalar"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vr, &vr")
(unspec:<VM>
[(plus:VI_D
(vec_duplicate:VI_D
(sign_extend:<VEL>
- (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ")))
- (match_operand:VI_D 1 "register_operand" " vr"))
- (match_operand:<VM> 3 "register_operand" " vm")
+ (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ, rJ")))
+ (match_operand:VI_D 1 "register_operand" " 0, vr"))
+ (match_operand:<VM> 3 "register_operand" " vm, vm")
(unspec:<VM>
- [(match_operand 4 "vector_length_operand" " rK")
- (match_operand 5 "const_int_operand" " i")
+ [(match_operand 4 "vector_length_operand" " rK, rK")
+ (match_operand 5 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_VMADC))]
"TARGET_VECTOR"
@@ -2597,16 +2597,16 @@
})
(define_insn "*pred_msbc<mode>_scalar"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vr, &vr")
(unspec:<VM>
[(minus:VI_D
(vec_duplicate:VI_D
- (match_operand:<VEL> 2 "reg_or_0_operand" " rJ"))
- (match_operand:VI_D 1 "register_operand" " vr"))
- (match_operand:<VM> 3 "register_operand" " vm")
+ (match_operand:<VEL> 2 "reg_or_0_operand" " rJ, rJ"))
+ (match_operand:VI_D 1 "register_operand" " 0, vr"))
+ (match_operand:<VM> 3 "register_operand" " vm, vm")
(unspec:<VM>
- [(match_operand 4 "vector_length_operand" " rK")
- (match_operand 5 "const_int_operand" " i")
+ [(match_operand 4 "vector_length_operand" " rK, rK")
+ (match_operand 5 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_VMSBC))]
"TARGET_VECTOR"
@@ -2617,17 +2617,17 @@
(set (attr "avl_type") (symbol_ref "INTVAL (operands[5])"))])
(define_insn "*pred_msbc<mode>_extended_scalar"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vr, &vr")
(unspec:<VM>
[(minus:VI_D
(vec_duplicate:VI_D
(sign_extend:<VEL>
- (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ")))
- (match_operand:VI_D 1 "register_operand" " vr"))
- (match_operand:<VM> 3 "register_operand" " vm")
+ (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ, rJ")))
+ (match_operand:VI_D 1 "register_operand" " 0, vr"))
+ (match_operand:<VM> 3 "register_operand" " vm, vm")
(unspec:<VM>
- [(match_operand 4 "vector_length_operand" " rK")
- (match_operand 5 "const_int_operand" " i")
+ [(match_operand 4 "vector_length_operand" " rK, rK")
+ (match_operand 5 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_VMSBC))]
"TARGET_VECTOR"
@@ -2638,14 +2638,14 @@
(set (attr "avl_type") (symbol_ref "INTVAL (operands[5])"))])
(define_insn "@pred_madc<mode>_overflow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vr, &vr, &vr")
(unspec:<VM>
[(plus:VI
- (match_operand:VI 1 "register_operand" " vr, vr")
- (match_operand:VI 2 "vector_arith_operand" " vr, vi"))
+ (match_operand:VI 1 "register_operand" " %0, vr, vr")
+ (match_operand:VI 2 "vector_arith_operand" "vrvi, vr, vi"))
(unspec:<VM>
- [(match_operand 3 "vector_length_operand" " rK, rK")
- (match_operand 4 "const_int_operand" " i, i")
+ [(match_operand 3 "vector_length_operand" " rK, rK, rK")
+ (match_operand 4 "const_int_operand" " i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
"TARGET_VECTOR"
@@ -2656,14 +2656,14 @@
(set (attr "avl_type") (symbol_ref "INTVAL (operands[4])"))])
(define_insn "@pred_msbc<mode>_overflow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vr, vr, &vr, &vr")
(unspec:<VM>
[(minus:VI
- (match_operand:VI 1 "register_operand" " vr")
- (match_operand:VI 2 "register_operand" " vr"))
+ (match_operand:VI 1 "register_operand" " 0, vr, vr, vr")
+ (match_operand:VI 2 "register_operand" " vr, 0, vr, vi"))
(unspec:<VM>
- [(match_operand 3 "vector_length_operand" " rK")
- (match_operand 4 "const_int_operand" " i")
+ [(match_operand 3 "vector_length_operand" " rK, rK, rK, rK")
+ (match_operand 4 "const_int_operand" " i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
"TARGET_VECTOR"
@@ -2674,15 +2674,15 @@
(set (attr "avl_type") (symbol_ref "INTVAL (operands[4])"))])
(define_insn "@pred_madc<mode>_overflow_scalar"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vr, &vr")
(unspec:<VM>
[(plus:VI_QHS
(vec_duplicate:VI_QHS
- (match_operand:<VEL> 2 "reg_or_0_operand" " rJ"))
- (match_operand:VI_QHS 1 "register_operand" " vr"))
+ (match_operand:<VEL> 2 "reg_or_0_operand" " rJ, rJ"))
+ (match_operand:VI_QHS 1 "register_operand" " 0, vr"))
(unspec:<VM>
- [(match_operand 3 "vector_length_operand" " rK")
- (match_operand 4 "const_int_operand" " i")
+ [(match_operand 3 "vector_length_operand" " rK, rK")
+ (match_operand 4 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
"TARGET_VECTOR"
@@ -2693,15 +2693,15 @@
(set (attr "avl_type") (symbol_ref "INTVAL (operands[4])"))])
(define_insn "@pred_msbc<mode>_overflow_scalar"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vr, &vr")
(unspec:<VM>
[(minus:VI_QHS
(vec_duplicate:VI_QHS
- (match_operand:<VEL> 2 "reg_or_0_operand" " rJ"))
- (match_operand:VI_QHS 1 "register_operand" " vr"))
+ (match_operand:<VEL> 2 "reg_or_0_operand" " rJ, rJ"))
+ (match_operand:VI_QHS 1 "register_operand" " 0, vr"))
(unspec:<VM>
- [(match_operand 3 "vector_length_operand" " rK")
- (match_operand 4 "const_int_operand" " i")
+ [(match_operand 3 "vector_length_operand" " rK, rK")
+ (match_operand 4 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
"TARGET_VECTOR"
@@ -2740,15 +2740,15 @@
})
(define_insn "*pred_madc<mode>_overflow_scalar"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vr, &vr")
(unspec:<VM>
[(plus:VI_D
(vec_duplicate:VI_D
- (match_operand:<VEL> 2 "reg_or_0_operand" " rJ"))
- (match_operand:VI_D 1 "register_operand" " vr"))
+ (match_operand:<VEL> 2 "reg_or_0_operand" " rJ, rJ"))
+ (match_operand:VI_D 1 "register_operand" " 0, vr"))
(unspec:<VM>
- [(match_operand 3 "vector_length_operand" " rK")
- (match_operand 4 "const_int_operand" " i")
+ [(match_operand 3 "vector_length_operand" " rK, rK")
+ (match_operand 4 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
"TARGET_VECTOR"
@@ -2759,16 +2759,16 @@
(set (attr "avl_type") (symbol_ref "INTVAL (operands[4])"))])
(define_insn "*pred_madc<mode>_overflow_extended_scalar"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vr, &vr")
(unspec:<VM>
[(plus:VI_D
(vec_duplicate:VI_D
(sign_extend:<VEL>
- (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ")))
- (match_operand:VI_D 1 "register_operand" " vr"))
+ (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ, rJ")))
+ (match_operand:VI_D 1 "register_operand" " 0, vr"))
(unspec:<VM>
- [(match_operand 3 "vector_length_operand" " rK")
- (match_operand 4 "const_int_operand" " i")
+ [(match_operand 3 "vector_length_operand" " rK, rK")
+ (match_operand 4 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
"TARGET_VECTOR"
@@ -2807,15 +2807,15 @@
})
(define_insn "*pred_msbc<mode>_overflow_scalar"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vr, &vr")
(unspec:<VM>
[(minus:VI_D
(vec_duplicate:VI_D
- (match_operand:<VEL> 2 "reg_or_0_operand" " rJ"))
- (match_operand:VI_D 1 "register_operand" " vr"))
+ (match_operand:<VEL> 2 "reg_or_0_operand" " rJ, rJ"))
+ (match_operand:VI_D 1 "register_operand" " 0, vr"))
(unspec:<VM>
- [(match_operand 3 "vector_length_operand" " rK")
- (match_operand 4 "const_int_operand" " i")
+ [(match_operand 3 "vector_length_operand" " rK, rK")
+ (match_operand 4 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
"TARGET_VECTOR"
@@ -2826,16 +2826,16 @@
(set (attr "avl_type") (symbol_ref "INTVAL (operands[4])"))])
(define_insn "*pred_msbc<mode>_overflow_extended_scalar"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vr, &vr")
(unspec:<VM>
[(minus:VI_D
(vec_duplicate:VI_D
(sign_extend:<VEL>
- (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ")))
- (match_operand:VI_D 1 "register_operand" " vr"))
+ (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ, rJ")))
+ (match_operand:VI_D 1 "register_operand" " 0, vr"))
(unspec:<VM>
- [(match_operand 3 "vector_length_operand" " rK")
- (match_operand 4 "const_int_operand" " i")
+ [(match_operand 3 "vector_length_operand" " rK, rK")
+ (match_operand 4 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
"TARGET_VECTOR"
@@ -3617,6 +3617,29 @@
"TARGET_VECTOR"
{})
+(define_insn "*pred_cmp<mode>_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "comparison_except_ltge_operator"
+ [(match_operand:VI 3 "register_operand" " vr")
+ (match_operand:VI 4 "vector_arith_operand" "vrvi")])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vms%B2.v%o4\t%0,%3,%v4,v0.t"
+ [(set_attr "type" "vicmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_cmp<mode>"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr, vr, vr")
@@ -3639,19 +3662,19 @@
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_cmp<mode>_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr, &vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, vr, vr, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1,vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 7 "const_int_operand" " i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "comparison_except_ltge_operator"
- [(match_operand:VI 4 "register_operand" " vr, vr, vr, vr")
- (match_operand:VI 5 "vector_arith_operand" " vr, vr, vi, vi")])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ [(match_operand:VI 4 "register_operand" " vr, 0, vr, 0, 0, vr, 0, vr, vr")
+ (match_operand:VI 5 "vector_arith_operand" " vrvi, vrvi, 0, 0, vrvi, 0, 0, vrvi, vrvi")])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, vu, vu, 0, 0, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vms%B3.v%o5\t%0,%4,%v5%p1"
[(set_attr "type" "vicmp")
@@ -3674,6 +3697,29 @@
"TARGET_VECTOR"
{})
+(define_insn "*pred_ltge<mode>_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "ltge_operator"
+ [(match_operand:VI 3 "register_operand" " vr")
+ (match_operand:VI 4 "vector_neg_arith_operand" "vrvj")])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vms%B2.v%o4\t%0,%3,%v4,v0.t"
+ [(set_attr "type" "vicmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_ltge<mode>"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr, vr, vr")
@@ -3696,19 +3742,19 @@
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_ltge<mode>_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr, &vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, vr, vr, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1,vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 7 "const_int_operand" " i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "ltge_operator"
- [(match_operand:VI 4 "register_operand" " vr, vr, vr, vr")
- (match_operand:VI 5 "vector_neg_arith_operand" " vr, vr, vj, vj")])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ [(match_operand:VI 4 "register_operand" " vr, 0, vr, 0, 0, vr, 0, vr, vr")
+ (match_operand:VI 5 "vector_neg_arith_operand" " vrvj, vrvj, 0, 0, vrvj, 0, 0, vrvj, vrvj")])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, vu, vu, 0, 0, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vms%B3.v%o5\t%0,%4,%v5%p1"
[(set_attr "type" "vicmp")
@@ -3732,6 +3778,30 @@
"TARGET_VECTOR"
{})
+(define_insn "*pred_cmp<mode>_scalar_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "comparison_except_eqge_operator"
+ [(match_operand:VI_QHS 3 "register_operand" " vr")
+ (vec_duplicate:VI_QHS
+ (match_operand:<VEL> 4 "register_operand" " r"))])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vms%B2.vx\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vicmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_cmp<mode>_scalar"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr")
@@ -3755,20 +3825,20 @@
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_cmp<mode>_scalar_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "comparison_except_eqge_operator"
- [(match_operand:VI_QHS 4 "register_operand" " vr, vr")
+ [(match_operand:VI_QHS 4 "register_operand" " vr, 0, 0, vr, vr")
(vec_duplicate:VI_QHS
- (match_operand:<VEL> 5 "register_operand" " r, r"))])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VEL> 5 "register_operand" " r, r, r, r, r"))])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vms%B3.vx\t%0,%4,%5%p1"
[(set_attr "type" "vicmp")
@@ -3792,6 +3862,30 @@
"TARGET_VECTOR"
{})
+(define_insn "*pred_eqne<mode>_scalar_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "equality_operator"
+ [(vec_duplicate:VI_QHS
+ (match_operand:<VEL> 4 "register_operand" " r"))
+ (match_operand:VI_QHS 3 "register_operand" " vr")])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vms%B2.vx\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vicmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_eqne<mode>_scalar"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr")
@@ -3815,20 +3909,20 @@
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_eqne<mode>_scalar_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "equality_operator"
[(vec_duplicate:VI_QHS
- (match_operand:<VEL> 5 "register_operand" " r, r"))
- (match_operand:VI_QHS 4 "register_operand" " vr, vr")])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VEL> 5 "register_operand" " r, r, r, r, r"))
+ (match_operand:VI_QHS 4 "register_operand" " vr, 0, 0, vr, vr")])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vms%B3.vx\t%0,%4,%5%p1"
[(set_attr "type" "vicmp")
@@ -3909,6 +4003,54 @@
DONE;
})
+(define_insn "*pred_cmp<mode>_scalar_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "comparison_except_eqge_operator"
+ [(match_operand:VI_D 3 "register_operand" " vr")
+ (vec_duplicate:VI_D
+ (match_operand:<VEL> 4 "register_operand" " r"))])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vms%B2.vx\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vicmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
+(define_insn "*pred_eqne<mode>_scalar_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "equality_operator"
+ [(vec_duplicate:VI_D
+ (match_operand:<VEL> 4 "register_operand" " r"))
+ (match_operand:VI_D 3 "register_operand" " vr")])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vms%B2.vx\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vicmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_cmp<mode>_scalar"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr")
@@ -3932,20 +4074,20 @@
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_cmp<mode>_scalar_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "comparison_except_eqge_operator"
- [(match_operand:VI_D 4 "register_operand" " vr, vr")
+ [(match_operand:VI_D 4 "register_operand" " vr, 0, 0, vr, vr")
(vec_duplicate:VI_D
- (match_operand:<VEL> 5 "register_operand" " r, r"))])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VEL> 5 "register_operand" " r, r, r, r, r"))])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vms%B3.vx\t%0,%4,%5%p1"
[(set_attr "type" "vicmp")
@@ -3974,25 +4116,50 @@
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_eqne<mode>_scalar_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "equality_operator"
[(vec_duplicate:VI_D
- (match_operand:<VEL> 5 "register_operand" " r, r"))
- (match_operand:VI_D 4 "register_operand" " vr, vr")])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VEL> 5 "register_operand" " r, r, r, r, r"))
+ (match_operand:VI_D 4 "register_operand" " vr, 0, 0, vr, vr")])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vms%B3.vx\t%0,%4,%5%p1"
[(set_attr "type" "vicmp")
(set_attr "mode" "<MODE>")])
+(define_insn "*pred_cmp<mode>_extended_scalar_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "comparison_except_eqge_operator"
+ [(match_operand:VI_D 3 "register_operand" " vr")
+ (vec_duplicate:VI_D
+ (sign_extend:<VEL>
+ (match_operand:<VSUBEL> 4 "register_operand" " r")))])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vms%B2.vx\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vicmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_cmp<mode>_extended_scalar"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr")
@@ -4016,26 +4183,51 @@
(set_attr "mode" "<MODE>")])
(define_insn "*pred_cmp<mode>_extended_scalar_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "comparison_except_eqge_operator"
- [(match_operand:VI_D 4 "register_operand" " vr, vr")
+ [(match_operand:VI_D 4 "register_operand" " vr, 0, 0, vr, vr")
(vec_duplicate:VI_D
(sign_extend:<VEL>
- (match_operand:<VSUBEL> 5 "register_operand" " r, r")))])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VSUBEL> 5 "register_operand" " r, r, r, r, r")))])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vms%B3.vx\t%0,%4,%5%p1"
[(set_attr "type" "vicmp")
(set_attr "mode" "<MODE>")])
+(define_insn "*pred_eqne<mode>_extended_scalar_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "equality_operator"
+ [(vec_duplicate:VI_D
+ (sign_extend:<VEL>
+ (match_operand:<VSUBEL> 4 "register_operand" " r")))
+ (match_operand:VI_D 3 "register_operand" " vr")])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vms%B2.vx\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vicmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_eqne<mode>_extended_scalar"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr")
@@ -4059,21 +4251,21 @@
(set_attr "mode" "<MODE>")])
(define_insn "*pred_eqne<mode>_extended_scalar_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "equality_operator"
[(vec_duplicate:VI_D
(sign_extend:<VEL>
- (match_operand:<VSUBEL> 5 "register_operand" " r, r")))
- (match_operand:VI_D 4 "register_operand" " vr, vr")])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VSUBEL> 5 "register_operand" " r, r, r, r, r")))
+ (match_operand:VI_D 4 "register_operand" " vr, 0, 0, vr, vr")])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vms%B3.vx\t%0,%4,%5%p1"
[(set_attr "type" "vicmp")
@@ -4111,6 +4303,7 @@
{
enum rtx_code code = GET_CODE (operands[3]);
rtx undef = RVV_VUNDEF (<VM>mode);
+ rtx tmp = gen_reg_rtx (<VM>mode);
if (code == GEU && rtx_equal_p (operands[5], const0_rtx))
{
/* If vmsgeu with 0 immediate, expand it to vmset. */
@@ -4157,12 +4350,11 @@
- pseudoinstruction: vmsge{u}.vx vd, va, x
- expansion: vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd. */
emit_insn (
- gen_pred_cmp<mode>_scalar (operands[0], operands[1], operands[2],
+ gen_pred_cmp<mode>_scalar (tmp, operands[1], operands[2],
operands[3], operands[4], operands[5],
operands[6], operands[7], operands[8]));
emit_insn (gen_pred_nand<vm> (operands[0], CONSTM1_RTX (<VM>mode),
- undef, operands[0], operands[0],
- operands[6], operands[8]));
+ undef, tmp, tmp, operands[6], operands[8]));
}
else
{
@@ -4171,13 +4363,12 @@
/* masked va >= x, vd == v0
- pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
- expansion: vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt. */
- rtx reg = gen_reg_rtx (<VM>mode);
emit_insn (gen_pred_cmp<mode>_scalar (
- reg, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4],
+ tmp, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4],
operands[5], operands[6], operands[7], operands[8]));
emit_insn (
gen_pred_andnot<vm> (operands[0], CONSTM1_RTX (<VM>mode), undef,
- operands[1], reg, operands[6], operands[8]));
+ operands[1], tmp, operands[6], operands[8]));
}
else
{
@@ -4186,10 +4377,10 @@
- expansion: vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0.
*/
emit_insn (gen_pred_cmp<mode>_scalar (
- operands[0], operands[1], operands[2], operands[3], operands[4],
+ tmp, operands[1], operands[2], operands[3], operands[4],
operands[5], operands[6], operands[7], operands[8]));
emit_insn (gen_pred (XOR, <VM>mode, operands[0],
- CONSTM1_RTX (<VM>mode), undef, operands[0],
+ CONSTM1_RTX (<VM>mode), undef, tmp,
operands[1], operands[6], operands[8]));
}
}
@@ -6296,21 +6487,44 @@
[(set_attr "type" "vfcmp")
(set_attr "mode" "<MODE>")])
+(define_insn "*pred_cmp<mode>_narrow_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "signed_order_operator"
+ [(match_operand:VF 3 "register_operand" " vr")
+ (match_operand:VF 4 "register_operand" " vr")])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vmf%B2.vv\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vfcmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_cmp<mode>_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, vr, vr, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "signed_order_operator"
- [(match_operand:VF 4 "register_operand" " vr, vr")
- (match_operand:VF 5 "register_operand" " vr, vr")])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ [(match_operand:VF 4 "register_operand" " vr, 0, vr, 0, 0, vr, 0, vr, vr")
+ (match_operand:VF 5 "register_operand" " vr, vr, 0, 0, vr, 0, 0, vr, vr")])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, vu, vu, 0, 0, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vmf%B3.vv\t%0,%4,%5%p1"
[(set_attr "type" "vfcmp")
@@ -6334,6 +6548,30 @@
"TARGET_VECTOR"
{})
+(define_insn "*pred_cmp<mode>_scalar_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "signed_order_operator"
+ [(match_operand:VF 3 "register_operand" " vr")
+ (vec_duplicate:VF
+ (match_operand:<VEL> 4 "register_operand" " f"))])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vmf%B2.vf\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vfcmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_cmp<mode>_scalar"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr")
@@ -6357,20 +6595,20 @@
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_cmp<mode>_scalar_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "signed_order_operator"
- [(match_operand:VF 4 "register_operand" " vr, vr")
+ [(match_operand:VF 4 "register_operand" " vr, 0, 0, vr, vr")
(vec_duplicate:VF
- (match_operand:<VEL> 5 "register_operand" " f, f"))])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VEL> 5 "register_operand" " f, f, f, f, f"))])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vmf%B3.vf\t%0,%4,%5%p1"
[(set_attr "type" "vfcmp")
@@ -6394,6 +6632,30 @@
"TARGET_VECTOR"
{})
+(define_insn "*pred_eqne<mode>_scalar_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "equality_operator"
+ [(vec_duplicate:VF
+ (match_operand:<VEL> 4 "register_operand" " f"))
+ (match_operand:VF 3 "register_operand" " vr")])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vmf%B2.vf\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vfcmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_eqne<mode>_scalar"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr")
@@ -6417,20 +6679,20 @@
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_eqne<mode>_scalar_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "equality_operator"
[(vec_duplicate:VF
- (match_operand:<VEL> 5 "register_operand" " f, f"))
- (match_operand:VF 4 "register_operand" " vr, vr")])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VEL> 5 "register_operand" " f, f, f, f, f"))
+ (match_operand:VF 4 "register_operand" " vr, 0, 0, vr, vr")])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vmf%B3.vf\t%0,%4,%5%p1"
[(set_attr "type" "vfcmp")
@@ -6730,44 +6992,44 @@
;; For example, The LMUL = 1 corresponding mode of VNx16QImode is VNx4QImode
;; for -march=rv*zve32* wheras VNx8QImode for -march=rv*zve64*
(define_insn "@pred_reduc_<reduc><mode><vlmul1>"
- [(set (match_operand:<VLMUL1> 0 "register_operand" "=vd, vd, vr, vr")
+ [(set (match_operand:<VLMUL1> 0 "register_operand" "=vr, vr")
(unspec:<VLMUL1>
[(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
- (match_operand 5 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(any_reduc:VI
(vec_duplicate:VI
(vec_select:<VEL>
- (match_operand:<VLMUL1> 4 "register_operand" " vr, vr, vr, vr")
+ (match_operand:<VLMUL1> 4 "register_operand" " vr, vr")
(parallel [(const_int 0)])))
- (match_operand:VI 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:<VLMUL1> 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC))]
+ (match_operand:VI 3 "register_operand" " vr, vr"))
+ (match_operand:<VLMUL1> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))]
"TARGET_VECTOR && TARGET_MIN_VLEN > 32"
"vred<reduc>.vs\t%0,%3,%4%p1"
[(set_attr "type" "vired")
(set_attr "mode" "<MODE>")])
(define_insn "@pred_reduc_<reduc><mode><vlmul1_zve32>"
- [(set (match_operand:<VLMUL1_ZVE32> 0 "register_operand" "=vd, vd, vr, vr")
+ [(set (match_operand:<VLMUL1_ZVE32> 0 "register_operand" "=vr, vr")
(unspec:<VLMUL1_ZVE32>
[(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
- (match_operand 5 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(any_reduc:VI_ZVE32
(vec_duplicate:VI_ZVE32
(vec_select:<VEL>
- (match_operand:<VLMUL1_ZVE32> 4 "register_operand" " vr, vr, vr, vr")
+ (match_operand:<VLMUL1_ZVE32> 4 "register_operand" " vr, vr")
(parallel [(const_int 0)])))
- (match_operand:VI_ZVE32 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:<VLMUL1_ZVE32> 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC))]
+ (match_operand:VI_ZVE32 3 "register_operand" " vr, vr"))
+ (match_operand:<VLMUL1_ZVE32> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))]
"TARGET_VECTOR && TARGET_MIN_VLEN == 32"
"vred<reduc>.vs\t%0,%3,%4%p1"
[(set_attr "type" "vired")
@@ -6810,90 +7072,90 @@
(set_attr "mode" "<MODE>")])
(define_insn "@pred_reduc_<reduc><mode><vlmul1>"
- [(set (match_operand:<VLMUL1> 0 "register_operand" "=vd, vd, vr, vr")
+ [(set (match_operand:<VLMUL1> 0 "register_operand" "=vr, vr")
(unspec:<VLMUL1>
[(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
- (match_operand 5 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(any_freduc:VF
(vec_duplicate:VF
(vec_select:<VEL>
- (match_operand:<VLMUL1> 4 "register_operand" " vr, vr, vr, vr")
+ (match_operand:<VLMUL1> 4 "register_operand" " vr, vr")
(parallel [(const_int 0)])))
- (match_operand:VF 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:<VLMUL1> 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC))]
+ (match_operand:VF 3 "register_operand" " vr, vr"))
+ (match_operand:<VLMUL1> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))]
"TARGET_VECTOR && TARGET_MIN_VLEN > 32"
"vfred<reduc>.vs\t%0,%3,%4%p1"
[(set_attr "type" "vfredu")
(set_attr "mode" "<MODE>")])
(define_insn "@pred_reduc_<reduc><mode><vlmul1_zve32>"
- [(set (match_operand:<VLMUL1_ZVE32> 0 "register_operand" "=vd, vd, vr, vr")
+ [(set (match_operand:<VLMUL1_ZVE32> 0 "register_operand" "=vr, vr")
(unspec:<VLMUL1_ZVE32>
[(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
- (match_operand 5 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(any_freduc:VF_ZVE32
(vec_duplicate:VF_ZVE32
(vec_select:<VEL>
- (match_operand:<VLMUL1_ZVE32> 4 "register_operand" " vr, vr, vr, vr")
+ (match_operand:<VLMUL1_ZVE32> 4 "register_operand" " vr, vr")
(parallel [(const_int 0)])))
- (match_operand:VF_ZVE32 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:<VLMUL1_ZVE32> 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC))]
+ (match_operand:VF_ZVE32 3 "register_operand" " vr, vr"))
+ (match_operand:<VLMUL1_ZVE32> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))]
"TARGET_VECTOR && TARGET_MIN_VLEN == 32"
"vfred<reduc>.vs\t%0,%3,%4%p1"
[(set_attr "type" "vfredu")
(set_attr "mode" "<MODE>")])
(define_insn "@pred_reduc_plus<order><mode><vlmul1>"
- [(set (match_operand:<VLMUL1> 0 "register_operand" "=vd, vd, vr, vr")
+ [(set (match_operand:<VLMUL1> 0 "register_operand" "=vr, vr")
(unspec:<VLMUL1>
[(unspec:<VLMUL1>
[(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
- (match_operand 5 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(plus:VF
(vec_duplicate:VF
(vec_select:<VEL>
- (match_operand:<VLMUL1> 4 "register_operand" " vr, vr, vr, vr")
+ (match_operand:<VLMUL1> 4 "register_operand" " vr, vr")
(parallel [(const_int 0)])))
- (match_operand:VF 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:<VLMUL1> 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC)] ORDER))]
+ (match_operand:VF 3 "register_operand" " vr, vr"))
+ (match_operand:<VLMUL1> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC)] ORDER))]
"TARGET_VECTOR && TARGET_MIN_VLEN > 32"
"vfred<order>sum.vs\t%0,%3,%4%p1"
[(set_attr "type" "vfred<order>")
(set_attr "mode" "<MODE>")])
(define_insn "@pred_reduc_plus<order><mode><vlmul1_zve32>"
- [(set (match_operand:<VLMUL1_ZVE32> 0 "register_operand" "=vd, vd, vr, vr")
+ [(set (match_operand:<VLMUL1_ZVE32> 0 "register_operand" "=vr, vr")
(unspec:<VLMUL1_ZVE32>
[(unspec:<VLMUL1_ZVE32>
[(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
- (match_operand 5 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(plus:VF_ZVE32
(vec_duplicate:VF_ZVE32
(vec_select:<VEL>
- (match_operand:<VLMUL1_ZVE32> 4 "register_operand" " vr, vr, vr, vr")
+ (match_operand:<VLMUL1_ZVE32> 4 "register_operand" " vr, vr")
(parallel [(const_int 0)])))
- (match_operand:VF_ZVE32 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:<VLMUL1_ZVE32> 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC)] ORDER))]
+ (match_operand:VF_ZVE32 3 "register_operand" " vr, vr"))
+ (match_operand:<VLMUL1_ZVE32> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC)] ORDER))]
"TARGET_VECTOR && TARGET_MIN_VLEN == 32"
"vfred<order>sum.vs\t%0,%3,%4%p1"
[(set_attr "type" "vfred<order>")
@@ -24,4 +24,4 @@ void f2 (void * in, void *out, int32_t x)
__riscv_vsm_v_b32 (out, m4, 4);
}
-/* { dg-final { scan-assembler-times {vmv} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
@@ -18,4 +18,4 @@ void f1 (void * in, void *out, int32_t x)
/* { dg-final { scan-assembler-times {vmslt\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t} 1 } } */
/* { dg-final { scan-assembler-times {vmxor\.mm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 1 } } */
/* { dg-final { scan-assembler-times {vmnot\.m\s+v[0-9]+,\s*v[0-9]+} 1 } } */
-/* { dg-final { scan-assembler-times {vmv} 1 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,303 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base,void *out,size_t vl)
+{
+ vuint64m1_t bindex = __riscv_vle64_v_u64m1 (base, vl);
+ vint8mf8_t v = __riscv_vluxei64_v_i8mf8(base,bindex,vl);
+ __riscv_vse8_v_i8mf8 (out,v,vl);
+}
+
+void f1 (void *base,void *out,size_t vl)
+{
+ vuint64m1_t bindex = __riscv_vle64_v_u64m1 (base, vl);
+ vint8mf8_t bindex2 = __riscv_vle8_v_i8mf8 ((void *)(base + 100), vl);
+ vint8mf8_t v = __riscv_vluxei64_v_i8mf8_tu(bindex2,base,bindex,vl);
+ __riscv_vse8_v_i8mf8 (out,v,vl);
+}
+
+void f2 (void *base,void *out,size_t vl)
+{
+ vuint64m1_t bindex = __riscv_vle64_v_u64m1 (base, vl);
+ vint8mf8_t v = __riscv_vluxei64_v_i8mf8(base,bindex,vl);
+ vuint64m1_t v2 = __riscv_vadd_vv_u64m1 (bindex, bindex,vl);
+ __riscv_vse8_v_i8mf8 (out,v,vl);
+ __riscv_vse64_v_u64m1 ((void *)out,v2,vl);
+}
+
+void f3 (void *base,void *out,size_t vl, int n)
+{
+ for (int i = 0; i < n; i++){
+ vuint64m1_t bindex = __riscv_vle64_v_u64m1 (base + 100*i, vl);
+ vint8mf8_t v = __riscv_vluxei64_v_i8mf8(base,bindex,vl);
+ vuint64m1_t v2 = __riscv_vadd_vv_u64m1 (bindex, bindex,vl);
+ __riscv_vse8_v_i8mf8 (out + 100*i,v,vl);
+ __riscv_vse64_v_u64m1 ((void *)(out + 200*i),v2,vl);
+ }
+}
+
+void f4 (void *base,void *out,size_t vl)
+{
+ vuint64m1_t bindex = __riscv_vle64_v_u64m1 (base, vl);
+ vint8mf8_t v = __riscv_vluxei64_v_i8mf8(base,bindex,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex,vl);
+ vuint64m1_t v2 = __riscv_vadd_vv_u64m1 (bindex, bindex,vl);
+ __riscv_vse8_v_i8mf8 (out,v,vl);
+ __riscv_vse64_v_u64m1 ((void *)out,v2,vl);
+}
+
+void f5 (void *base,void *base2,void *out,size_t vl, int n)
+{
+ vuint64m1_t bindex = __riscv_vle64_v_u64m1 (base + 100, vl);
+ for (int i = 0; i < n; i++){
+ vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
+ vint8mf8_t v = __riscv_vluxei64_v_i8mf8_m(m,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex,vl);
+ v = __riscv_vle8_v_i8mf8_tu (v, base2, vl);
+ __riscv_vse8_v_i8mf8 (out + 100*i,v,vl);
+ }
+}
+
+void f6 (void *base,void *out,size_t vl)
+{
+ vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base, vl);
+ vint8m1_t v = __riscv_vluxei64_v_i8m1(base,bindex,vl);
+ __riscv_vse8_v_i8m1 (out,v,vl);
+}
+
+void f7 (void *base,void *out,size_t vl)
+{
+ vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base, vl);
+ vint8m1_t src = __riscv_vle8_v_i8m1 ((void *)(base + 100), vl);
+ vint8m1_t v = __riscv_vluxei64_v_i8m1_tu(src,base,bindex,vl);
+ __riscv_vse8_v_i8m1 (out,v,vl);
+}
+
+void f8 (void *base,void *out,size_t vl)
+{
+ vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base, vl);
+ vint8m1_t v = __riscv_vluxei64_v_i8m1(base,bindex,vl);
+ vuint64m8_t v2 = __riscv_vadd_vv_u64m8 (bindex, bindex,vl);
+ __riscv_vse8_v_i8m1 (out,v,vl);
+ __riscv_vse64_v_u64m8 ((void *)out,v2,vl);
+}
+
+void f9 (void *base,void *out,size_t vl, int n)
+{
+ for (int i = 0; i < n; i++){
+ vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base + 100*i, vl);
+ vint8m1_t v = __riscv_vluxei64_v_i8m1(base,bindex,vl);
+ vuint64m8_t v2 = __riscv_vadd_vv_u64m8 (bindex, bindex,vl);
+ __riscv_vse8_v_i8m1 (out + 100*i,v,vl);
+ __riscv_vse64_v_u64m8 ((void *)(out + 200*i),v2,vl);
+ }
+}
+
+void f10 (void *base,void *out,size_t vl)
+{
+ vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base, vl);
+ vint8m1_t v = __riscv_vluxei64_v_i8m1(base,bindex,vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex,vl);
+ vuint64m8_t v2 = __riscv_vadd_vv_u64m8 (bindex, bindex,vl);
+ __riscv_vse8_v_i8m1 (out,v,vl);
+ __riscv_vse64_v_u64m8 ((void *)out,v2,vl);
+}
+
+void f11 (void *base,void *base2,void *out,size_t vl, int n)
+{
+ vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base + 100, vl);
+ for (int i = 0; i < n; i++){
+ vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+ vint8m1_t v = __riscv_vluxei64_v_i8m1_m(m,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex,vl);
+ v = __riscv_vle8_v_i8m1_tu (v, base2, vl);
+ __riscv_vse8_v_i8m1 (out + 100*i,v,vl);
+ }
+}
+
+void f12 (void *base,void *out,size_t vl, int n)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 ((void *)(base + 1000), vl);
+ for (int i = 0; i < n; i++){
+ vuint64m1_t bindex = __riscv_vle64_v_u64m1 (base + 100*i, vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex,vl);
+ __riscv_vse8_v_i8mf8 (out + 100*i,v,vl);
+ }
+}
+
+void f13 (void *base,void *out,size_t vl, int n)
+{
+ vint8m1_t v = __riscv_vle8_v_i8m1 ((void *)(base + 1000), vl);
+ for (int i = 0; i < n; i++){
+ vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base + 100*i, vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex,vl);
+ __riscv_vse8_v_i8m1 (out + 100*i,v,vl);
+ }
+}
+
+void f14 (void *base,void *out,size_t vl, int n)
+{
+ for (int i = 0; i < n; i++){
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 ((void *)(base + 1000 * i), vl);
+ vuint64m1_t bindex = __riscv_vle64_v_u64m1 (base + 100*i, vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex,vl);
+ __riscv_vse8_v_i8mf8 (out + 100*i,v,vl);
+ }
+}
+
+void f15 (void *base,void *out,size_t vl, int n)
+{
+ for (int i = 0; i < n; i++){
+ vint8m1_t v = __riscv_vle8_v_i8m1 ((void *)(base + 1000 * i), vl);
+ vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base + 100*i, vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex,vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex,vl);
+ __riscv_vse8_v_i8m1 (out + 100*i,v,vl);
+ }
+}
+
+void f16 (void *base,void *out,size_t vl, int n)
+{
+ for (int i = 0; i < n; i++){
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 ((void *)(base + 1000 * i), vl);
+ vuint64m1_t bindex1 = __riscv_vle64_v_u64m1 (base + 100*i, vl);
+ vuint64m1_t bindex2 = __riscv_vle64_v_u64m1 (base + 200*i, vl);
+ vuint64m1_t bindex3 = __riscv_vle64_v_u64m1 (base + 300*i, vl);
+ vuint64m1_t bindex4 = __riscv_vle64_v_u64m1 (base + 400*i, vl);
+ vuint64m1_t bindex5 = __riscv_vle64_v_u64m1 (base + 500*i, vl);
+ vuint64m1_t bindex6 = __riscv_vle64_v_u64m1 (base + 600*i, vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex1,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex2,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex3,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex4,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex5,vl);
+ v = __riscv_vluxei64_v_i8mf8_tu(v,base,bindex6,vl);
+ __riscv_vse8_v_i8mf8 (out + 100*i,v,vl);
+ }
+}
+
+void f17 (void *base,void *out,size_t vl, int n)
+{
+ for (int i = 0; i < n; i++){
+ vint8m1_t v = __riscv_vle8_v_i8m1 ((void *)(base + 1000 * i), vl);
+ vuint64m8_t bindex1 = __riscv_vle64_v_u64m8 (base + 100*i, vl);
+ vuint64m8_t bindex2 = __riscv_vle64_v_u64m8 (base + 200*i, vl);
+ vuint64m8_t bindex3 = __riscv_vle64_v_u64m8 (base + 300*i, vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex1,vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex2,vl);
+ v = __riscv_vluxei64_v_i8m1_tu(v,base,bindex3,vl);
+ __riscv_vse8_v_i8m1 (out + 100*i,v,vl);
+ }
+}
+
+void f18 (void *base,void *base2,void *out,size_t vl, int n)
+{
+ vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base + 100, vl);
+ for (int i = 0; i < n; i++){
+ vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+ vuint32m4_t v = __riscv_vluxei64_v_u32m4_m(m,base,bindex,vl);
+ vuint32m4_t v2 = __riscv_vle32_v_u32m4_tu (v, base2 + i, vl);
+ vint8m1_t v3 = __riscv_vluxei32_v_i8m1_m(m,base,v2,vl);
+ __riscv_vse8_v_i8m1 (out + 100*i,v3,vl);
+ }
+}
+
+void f19 (void *base,void *base2,void *out,size_t vl, int n)
+{
+ vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base + 100, vl);
+ for (int i = 0; i < n; i++){
+ vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+ vuint64m8_t v = __riscv_vluxei64_v_u64m8_m(m,base,bindex,vl);
+ vuint64m8_t v2 = __riscv_vle64_v_u64m8_tu (v, base2 + i, vl);
+ vint8m1_t v3 = __riscv_vluxei64_v_i8m1_m(m,base,v,vl);
+ vint8m1_t v4 = __riscv_vluxei64_v_i8m1_m(m,base,v2,vl);
+ __riscv_vse8_v_i8m1 (out + 100*i,v3,vl);
+ __riscv_vse8_v_i8m1 (out + 222*i,v4,vl);
+ }
+}
+void f20 (void *base,void *out,size_t vl)
+{
+ vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base, vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23");
+
+ vint8m1_t v = __riscv_vluxei64_v_i8m1(base,bindex,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vse8_v_i8m1 (out,v,vl);
+}
+
+void f21 (void *base,void *out,size_t vl)
+{
+ vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base, vl);
+ vbool8_t m = __riscv_vlm_v_b8 (base, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23");
+
+ vint8m1_t v = __riscv_vluxei64_v_i8m1_m(m,base,bindex,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vse8_v_i8m1 (out,v,vl);
+}
+
+void f22 (void *base,void *out,size_t vl)
+{
+ vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base, vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23");
+
+ vint8m1_t v = __riscv_vluxei64_v_i8m1(base,bindex,vl);
+ asm volatile("#" ::
+ : "v0", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+ v = __riscv_vadd_vv_i8m1 (v,v,vl);
+ asm volatile("#" ::
+ : "v0", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vse8_v_i8m1 (out,v,vl);
+}
+
+/* { dg-final { scan-assembler-times {vmv} 1 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,133 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16m2_t v0 = __riscv_vle16_v_i16m2 (base, vl);
+ vint16m2_t v1 = __riscv_vle16_v_i16m2 ((int16_t *)(base + 100), vl);
+ vbool8_t m = __riscv_vlm_v_b8 ((uint8_t *)(base + 200), vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27");
+
+ m = __riscv_vmadc_vvm_i16m2_b8 (v0, v1, m, 4);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,m,vl);
+}
+
+void f1 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16m2_t v0 = __riscv_vle16_v_i16m2 (base, vl);
+ vint16m2_t v1 = __riscv_vle16_v_i16m2 ((int16_t *)(base + 100), vl);
+ vbool8_t m = __riscv_vlm_v_b8 ((uint8_t *)(base + 200), vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27");
+
+ m = __riscv_vmadc_vvm_i16m2_b8 (v0, v1, m, 4);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v31");
+
+ __riscv_vsm_v_b8 (out,m,vl);
+}
+
+void f2 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16m2_t v0 = __riscv_vle16_v_i16m2 (base, vl);
+ vint16m2_t v1 = __riscv_vle16_v_i16m2 ((int16_t *)(base + 100), vl);
+ vbool8_t m = __riscv_vlm_v_b8 ((uint8_t *)(base + 200), vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27");
+
+ m = __riscv_vmadc_vvm_i16m2_b8 (v0, v1, m, 4);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,m,vl);
+}
+
+void f3 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16mf2_t v0 = __riscv_vle16_v_i16mf2 (base, vl);
+ vint16mf2_t v1 = __riscv_vle16_v_i16mf2 ((int16_t *)(base + 100), vl);
+ vbool32_t m = __riscv_vlm_v_b32 ((uint8_t *)(base + 200), vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ m = __riscv_vmadc_vvm_i16mf2_b32 (v0, v1, m, 4);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30");
+
+ __riscv_vsm_v_b32 (out,m,vl);
+}
+
+void f4 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16mf2_t v0 = __riscv_vle16_v_i16mf2 (base, vl);
+ vint16mf2_t v1 = __riscv_vle16_v_i16mf2 ((int16_t *)(base + 100), vl);
+ vbool32_t m = __riscv_vlm_v_b32 ((uint8_t *)(base + 200), vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ m = __riscv_vmadc_vvm_i16mf2_b32 (v0, v1, m, 4);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v31");
+
+ __riscv_vsm_v_b32 (out,m,vl);
+}
+
+void f5 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16mf2_t v0 = __riscv_vle16_v_i16mf2 (base, vl);
+ vint16mf2_t v1 = __riscv_vle16_v_i16mf2 ((int16_t *)(base + 100), vl);
+ vbool32_t m = __riscv_vlm_v_b32 ((uint8_t *)(base + 200), vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ m = __riscv_vmadc_vvm_i16mf2_b32 (v0, v1, m, 4);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v30", "v31");
+
+ __riscv_vsm_v_b32 (out,m,vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,133 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16m2_t v0 = __riscv_vle16_v_i16m2 (base, vl);
+ vint16m2_t v1 = __riscv_vle16_v_i16m2 ((int16_t *)(base + 100), vl);
+ vbool8_t m = __riscv_vlm_v_b8 ((uint8_t *)(base + 200), vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27");
+
+ m = __riscv_vmsbc_vvm_i16m2_b8 (v0, v1, m, 4);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,m,vl);
+}
+
+void f1 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16m2_t v0 = __riscv_vle16_v_i16m2 (base, vl);
+ vint16m2_t v1 = __riscv_vle16_v_i16m2 ((int16_t *)(base + 100), vl);
+ vbool8_t m = __riscv_vlm_v_b8 ((uint8_t *)(base + 200), vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27");
+
+ m = __riscv_vmsbc_vvm_i16m2_b8 (v0, v1, m, 4);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v31");
+
+ __riscv_vsm_v_b8 (out,m,vl);
+}
+
+void f2 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16m2_t v0 = __riscv_vle16_v_i16m2 (base, vl);
+ vint16m2_t v1 = __riscv_vle16_v_i16m2 ((int16_t *)(base + 100), vl);
+ vbool8_t m = __riscv_vlm_v_b8 ((uint8_t *)(base + 200), vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27");
+
+ m = __riscv_vmsbc_vvm_i16m2_b8 (v0, v1, m, 4);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,m,vl);
+}
+
+void f3 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16mf2_t v0 = __riscv_vle16_v_i16mf2 (base, vl);
+ vint16mf2_t v1 = __riscv_vle16_v_i16mf2 ((int16_t *)(base + 100), vl);
+ vbool32_t m = __riscv_vlm_v_b32 ((uint8_t *)(base + 200), vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ m = __riscv_vmsbc_vvm_i16mf2_b32 (v0, v1, m, 4);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30");
+
+ __riscv_vsm_v_b32 (out,m,vl);
+}
+
+void f4 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16mf2_t v0 = __riscv_vle16_v_i16mf2 (base, vl);
+ vint16mf2_t v1 = __riscv_vle16_v_i16mf2 ((int16_t *)(base + 100), vl);
+ vbool32_t m = __riscv_vlm_v_b32 ((uint8_t *)(base + 200), vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ m = __riscv_vmsbc_vvm_i16mf2_b32 (v0, v1, m, 4);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v31");
+
+ __riscv_vsm_v_b32 (out,m,vl);
+}
+
+void f5 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16mf2_t v0 = __riscv_vle16_v_i16mf2 (base, vl);
+ vint16mf2_t v1 = __riscv_vle16_v_i16mf2 ((int16_t *)(base + 100), vl);
+ vbool32_t m = __riscv_vlm_v_b32 ((uint8_t *)(base + 200), vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ m = __riscv_vmsbc_vvm_i16mf2_b32 (v0, v1, m, 4);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v30", "v31");
+
+ __riscv_vsm_v_b32 (out,m,vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,127 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16m2_t v0 = __riscv_vle16_v_i16m2 (base, vl);
+ vint16m2_t v1 = __riscv_vle16_v_i16m2 ((int16_t *)(base + 100), vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27");
+
+ vbool8_t m = __riscv_vmadc_vv_i16m2_b8 (v0, v1,4);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,m,vl);
+}
+
+void f1 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16m2_t v0 = __riscv_vle16_v_i16m2 (base, vl);
+ vint16m2_t v1 = __riscv_vle16_v_i16m2 ((int16_t *)(base + 100), vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27");
+
+ vbool8_t m = __riscv_vmadc_vv_i16m2_b8 (v0, v1,4);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v31");
+
+ __riscv_vsm_v_b8 (out,m,vl);
+}
+
+void f2 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16m2_t v0 = __riscv_vle16_v_i16m2 (base, vl);
+ vint16m2_t v1 = __riscv_vle16_v_i16m2 ((int16_t *)(base + 100), vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27");
+
+ vbool8_t m = __riscv_vmadc_vv_i16m2_b8 (v0, v1,4);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,m,vl);
+}
+
+void f3 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16mf2_t v0 = __riscv_vle16_v_i16mf2 (base, vl);
+ vint16mf2_t v1 = __riscv_vle16_v_i16mf2 ((int16_t *)(base + 100), vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ vbool32_t m = __riscv_vmadc_vv_i16mf2_b32 (v0, v1,4);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30");
+
+ __riscv_vsm_v_b32 (out,m,vl);
+}
+
+void f4 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16mf2_t v0 = __riscv_vle16_v_i16mf2 (base, vl);
+ vint16mf2_t v1 = __riscv_vle16_v_i16mf2 ((int16_t *)(base + 100), vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ vbool32_t m = __riscv_vmadc_vv_i16mf2_b32 (v0, v1,4);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v31");
+
+ __riscv_vsm_v_b32 (out,m,vl);
+}
+
+void f5 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16mf2_t v0 = __riscv_vle16_v_i16mf2 (base, vl);
+ vint16mf2_t v1 = __riscv_vle16_v_i16mf2 ((int16_t *)(base + 100), vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ vbool32_t m = __riscv_vmadc_vv_i16mf2_b32 (v0, v1,4);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v30", "v31");
+
+ __riscv_vsm_v_b32 (out,m,vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,127 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16m2_t v0 = __riscv_vle16_v_i16m2 (base, vl);
+ vint16m2_t v1 = __riscv_vle16_v_i16m2 ((int16_t *)(base + 100), vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27");
+
+ vbool8_t m = __riscv_vmsbc_vv_i16m2_b8 (v0, v1,4);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,m,vl);
+}
+
+void f1 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16m2_t v0 = __riscv_vle16_v_i16m2 (base, vl);
+ vint16m2_t v1 = __riscv_vle16_v_i16m2 ((int16_t *)(base + 100), vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27");
+
+ vbool8_t m = __riscv_vmsbc_vv_i16m2_b8 (v0, v1,4);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v31");
+
+ __riscv_vsm_v_b8 (out,m,vl);
+}
+
+void f2 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16m2_t v0 = __riscv_vle16_v_i16m2 (base, vl);
+ vint16m2_t v1 = __riscv_vle16_v_i16m2 ((int16_t *)(base + 100), vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27");
+
+ vbool8_t m = __riscv_vmsbc_vv_i16m2_b8 (v0, v1,4);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,m,vl);
+}
+
+void f3 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16mf2_t v0 = __riscv_vle16_v_i16mf2 (base, vl);
+ vint16mf2_t v1 = __riscv_vle16_v_i16mf2 ((int16_t *)(base + 100), vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ vbool32_t m = __riscv_vmsbc_vv_i16mf2_b32 (v0, v1,4);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30");
+
+ __riscv_vsm_v_b32 (out,m,vl);
+}
+
+void f4 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16mf2_t v0 = __riscv_vle16_v_i16mf2 (base, vl);
+ vint16mf2_t v1 = __riscv_vle16_v_i16mf2 ((int16_t *)(base + 100), vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ vbool32_t m = __riscv_vmsbc_vv_i16mf2_b32 (v0, v1,4);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v31");
+
+ __riscv_vsm_v_b32 (out,m,vl);
+}
+
+void f5 (int16_t *base,int8_t *out,size_t vl)
+{
+ vint16mf2_t v0 = __riscv_vle16_v_i16mf2 (base, vl);
+ vint16mf2_t v1 = __riscv_vle16_v_i16mf2 ((int16_t *)(base + 100), vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ vbool32_t m = __riscv_vmsbc_vv_i16mf2_b32 (v0, v1,4);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v30", "v31");
+
+ __riscv_vsm_v_b32 (out,m,vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,231 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vv_u16m8_b2_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vv_u16m8_b2_mu(m1,m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vv_u16m8_b2_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vv_u16m8_b2_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vv_u16m8_b2_m(m1,v1,v1,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vv_u16m8_b2_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ vbool2_t m2 = __riscv_vlm_v_b2 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vv_u16m8_b2_mu(m1,m2,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vv_i32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmseq_vv_i32m8_b4_mu (m3, m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vv_i32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmseq_vv_i32m8_b4_m (m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vv_i32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmseq_vv_i32m8_b4_m (m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmseq_vv_i32m8_b4_mu (m3, m4, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vv_i32m8_b4 (v, v2, 4);
+ vbool4_t m4 = __riscv_vmseq_vv_i32m8_b4_mu (m3, m3, v2, v, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vv_i32m8_b4 (v, v2, 4);
+ vbool4_t m4 = __riscv_vmseq_vv_i32m8_b4_m (m3, v2, v, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vint32m8_t v = __riscv_vle32_v_i32m8 (base1, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, base1, 4);
+ mask = __riscv_vmseq_vv_i32m8_b4 (v, v2, 4);
+ for (int i = 0; i < n; i++){
+ vint32m8_t v3 = __riscv_vle32_v_i32m8 (base1 + i, 4);
+ vint32m8_t v4 = __riscv_vle32_v_i32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vv_i32m8_b4_m (mask, v3, v4,32);
+ mask = __riscv_vmseq_vv_i32m8_b4_mu (mask, mask, v4, v4, 32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vint32m1_t v = __riscv_vle32_v_i32m1 (base1, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, base1, 4);
+ mask = __riscv_vmseq_vv_i32m1_b32 (v, v2, 4);
+ for (int i = 0; i < n; i++){
+ vint32m1_t v3 = __riscv_vle32_v_i32m1 (base1 + i, 4);
+ vint32m1_t v4 = __riscv_vle32_v_i32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vv_i32m1_b32_m (mask, v3, v4,32);
+ mask = __riscv_vmseq_vv_i32m1_b32_mu (mask, mask, v4, v4, 32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,231 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vv_u16m8_b2_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vv_u16m8_b2_mu(m1,m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vv_u16m8_b2_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vv_u16m8_b2_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vv_u16m8_b2_m(m1,v1,v1,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vv_u16m8_b2_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ vbool2_t m2 = __riscv_vlm_v_b2 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vv_u16m8_b2_mu(m1,m2,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vv_i32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmslt_vv_i32m8_b4_mu (m3, m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vv_i32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmslt_vv_i32m8_b4_m (m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vv_i32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmslt_vv_i32m8_b4_m (m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmslt_vv_i32m8_b4_mu (m3, m4, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vv_i32m8_b4 (v, v2, 4);
+ vbool4_t m4 = __riscv_vmslt_vv_i32m8_b4_mu (m3, m3, v2, v, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vv_i32m8_b4 (v, v2, 4);
+ vbool4_t m4 = __riscv_vmslt_vv_i32m8_b4_m (m3, v2, v, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vint32m8_t v = __riscv_vle32_v_i32m8 (base1, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, base1, 4);
+ mask = __riscv_vmslt_vv_i32m8_b4 (v, v2, 4);
+ for (int i = 0; i < n; i++){
+ vint32m8_t v3 = __riscv_vle32_v_i32m8 (base1 + i, 4);
+ vint32m8_t v4 = __riscv_vle32_v_i32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vv_i32m8_b4_m (mask, v3, v4,32);
+ mask = __riscv_vmslt_vv_i32m8_b4_mu (mask, mask, v4, v4, 32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vint32m1_t v = __riscv_vle32_v_i32m1 (base1, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, base1, 4);
+ mask = __riscv_vmslt_vv_i32m1_b32 (v, v2, 4);
+ for (int i = 0; i < n; i++){
+ vint32m1_t v3 = __riscv_vle32_v_i32m1 (base1 + i, 4);
+ vint32m1_t v4 = __riscv_vle32_v_i32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vv_i32m1_b32_m (mask, v3, v4,32);
+ mask = __riscv_vmslt_vv_i32m1_b32_mu (mask, mask, v4, v4, 32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,231 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_mu(m1,m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ vbool2_t m2 = __riscv_vlm_v_b2 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_mu(m1,m2,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmseq_vx_i32m8_b4_mu (m3, m4, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vint32m8_t v = __riscv_vle32_v_i32m8 (base1, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i32m8_b4 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vint32m8_t v3 = __riscv_vle32_v_i32m8 (base1 + i, 4);
+ vint32m8_t v4 = __riscv_vle32_v_i32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i32m8_b4_m (mask, v3, x,32);
+ mask = __riscv_vmseq_vx_i32m8_b4_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vint32m1_t v = __riscv_vle32_v_i32m1 (base1, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i32m1_b32 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vint32m1_t v3 = __riscv_vle32_v_i32m1 (base1 + i, 4);
+ vint32m1_t v4 = __riscv_vle32_v_i32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i32m1_b32_m (mask, v3, x,32);
+ mask = __riscv_vmseq_vx_i32m1_b32_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,231 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_mu(m1,m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ vbool2_t m2 = __riscv_vlm_v_b2 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_mu(m1,m2,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmslt_vx_i32m8_b4_mu (m3, m4, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vint32m8_t v = __riscv_vle32_v_i32m8 (base1, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i32m8_b4 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vint32m8_t v3 = __riscv_vle32_v_i32m8 (base1 + i, 4);
+ vint32m8_t v4 = __riscv_vle32_v_i32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i32m8_b4_m (mask, v3, x,32);
+ mask = __riscv_vmslt_vx_i32m8_b4_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vint32m1_t v = __riscv_vle32_v_i32m1 (base1, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i32m1_b32 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vint32m1_t v3 = __riscv_vle32_v_i32m1 (base1 + i, 4);
+ vint32m1_t v4 = __riscv_vle32_v_i32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i32m1_b32_m (mask, v3, x,32);
+ mask = __riscv_vmslt_vx_i32m1_b32_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,231 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1, -16,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_mu(m1,m1,v1, -16,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1, -16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1, -16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1, -16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1, -16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ vbool2_t m2 = __riscv_vlm_v_b2 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_mu(m1,m2,v1, -16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, -16, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_mu (m3, m3, v2, -16, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, -16, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_m (m3, v2, -16, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, -16, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_m (m3, v2, -16, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmseq_vx_i32m8_b4_mu (m3, m4, v2, -16, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, -16, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_mu (m3, m3, v2, -16, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, -16, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_m (m3, v2, -16, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vint32m8_t v = __riscv_vle32_v_i32m8 (base1, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i32m8_b4 (v, -16, 4);
+ for (int i = 0; i < n; i++){
+ vint32m8_t v3 = __riscv_vle32_v_i32m8 (base1 + i, 4);
+ vint32m8_t v4 = __riscv_vle32_v_i32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i32m8_b4_m (mask, v3, -16,32);
+ mask = __riscv_vmseq_vx_i32m8_b4_mu (mask, mask, v4, -16, 32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vint32m1_t v = __riscv_vle32_v_i32m1 (base1, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i32m1_b32 (v, -16, 4);
+ for (int i = 0; i < n; i++){
+ vint32m1_t v3 = __riscv_vle32_v_i32m1 (base1 + i, 4);
+ vint32m1_t v4 = __riscv_vle32_v_i32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i32m1_b32_m (mask, v3, -16,32);
+ mask = __riscv_vmseq_vx_i32m1_b32_mu (mask, mask, v4, -16, 32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,231 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1, -15,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_mu(m1,m1,v1, -15,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1, -15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1, -15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1, -15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1, -15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ vbool2_t m2 = __riscv_vlm_v_b2 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_mu(m1,m2,v1, -15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, -15,4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_mu (m3, m3, v2, -15,4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, -15,4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_m (m3, v2, -15,4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, -15,4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_m (m3, v2, -15,4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmslt_vx_i32m8_b4_mu (m3, m4, v2, -15,4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, -15,4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_mu (m3, m3, v2, -15,4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, -15,4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_m (m3, v2, -15,4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vint32m8_t v = __riscv_vle32_v_i32m8 (base1, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i32m8_b4 (v, -15,4);
+ for (int i = 0; i < n; i++){
+ vint32m8_t v3 = __riscv_vle32_v_i32m8 (base1 + i, 4);
+ vint32m8_t v4 = __riscv_vle32_v_i32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i32m8_b4_m (mask, v3, -15,32);
+ mask = __riscv_vmslt_vx_i32m8_b4_mu (mask, mask, v4, -15,32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vint32m1_t v = __riscv_vle32_v_i32m1 (base1, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i32m1_b32 (v, -15,4);
+ for (int i = 0; i < n; i++){
+ vint32m1_t v3 = __riscv_vle32_v_i32m1 (base1 + i, 4);
+ vint32m1_t v4 = __riscv_vle32_v_i32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i32m1_b32_m (mask, v3, -15,32);
+ mask = __riscv_vmslt_vx_i32m1_b32_mu (mask, mask, v4, -15,32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,231 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_mu(m1,m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ vbool8_t m2 = __riscv_vlm_v_b8 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_mu(m1,m2,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+ vbool8_t m5 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m4, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)base1;
+ vint64m8_t v = __riscv_vle64_v_i64m8 (base1, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i64m8_b8 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vint64m8_t v3 = __riscv_vle64_v_i64m8 (base1 + i, 4);
+ vint64m8_t v4 = __riscv_vle64_v_i64m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i64m8_b8_m (mask, v3, x,32);
+ mask = __riscv_vmseq_vx_i64m8_b8_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b8 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool64_t mask = *(vbool64_t*)base1;
+ vint64m1_t v = __riscv_vle64_v_i64m1 (base1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i64m1_b64 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vint64m1_t v3 = __riscv_vle64_v_i64m1 (base1 + i, 4);
+ vint64m1_t v4 = __riscv_vle64_v_i64m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i64m1_b64_m (mask, v3, x,32);
+ mask = __riscv_vmseq_vx_i64m1_b64_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b64 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,231 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_mu(m1,m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ vbool8_t m2 = __riscv_vlm_v_b8 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_mu(m1,m2,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+ vbool8_t m5 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m4, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)base1;
+ vint64m8_t v = __riscv_vle64_v_i64m8 (base1, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i64m8_b8 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vint64m8_t v3 = __riscv_vle64_v_i64m8 (base1 + i, 4);
+ vint64m8_t v4 = __riscv_vle64_v_i64m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i64m8_b8_m (mask, v3, x,32);
+ mask = __riscv_vmslt_vx_i64m8_b8_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b8 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool64_t mask = *(vbool64_t*)base1;
+ vint64m1_t v = __riscv_vle64_v_i64m1 (base1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i64m1_b64 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vint64m1_t v3 = __riscv_vle64_v_i64m1 (base1 + i, 4);
+ vint64m1_t v4 = __riscv_vle64_v_i64m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i64m1_b64_m (mask, v3, x,32);
+ mask = __riscv_vmslt_vx_i64m1_b64_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b64 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,231 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,-16,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_mu(m1,m1,v1,-16,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,-16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,-16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,-16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,-16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ vbool8_t m2 = __riscv_vlm_v_b8 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_mu(m1,m2,v1,-16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f7 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, -16, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m3, v2, -16, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f8 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, -16, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, -16, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f9 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, -16, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, -16, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+ vbool8_t m5 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m4, v2, -16, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m5, 4);
+}
+
+void f10 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, -16, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m3, v2, -16, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f11 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, -16, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, -16, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n)
+{
+ vbool8_t mask = *(vbool8_t*)base1;
+ vint64m8_t v = __riscv_vle64_v_i64m8 (base1, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i64m8_b8 (v, -16, 4);
+ for (int i = 0; i < n; i++){
+ vint64m8_t v3 = __riscv_vle64_v_i64m8 (base1 + i, 4);
+ vint64m8_t v4 = __riscv_vle64_v_i64m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i64m8_b8_m (mask, v3, -16,32);
+ mask = __riscv_vmseq_vx_i64m8_b8_mu (mask, mask, v4, -16, 32);
+ }
+ __riscv_vsm_v_b8 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n)
+{
+ vbool64_t mask = *(vbool64_t*)base1;
+ vint64m1_t v = __riscv_vle64_v_i64m1 (base1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i64m1_b64 (v, -16, 4);
+ for (int i = 0; i < n; i++){
+ vint64m1_t v3 = __riscv_vle64_v_i64m1 (base1 + i, 4);
+ vint64m1_t v4 = __riscv_vle64_v_i64m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i64m1_b64_m (mask, v3, -16,32);
+ mask = __riscv_vmseq_vx_i64m1_b64_mu (mask, mask, v4, -16, 32);
+ }
+ __riscv_vsm_v_b64 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,231 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,-15,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_mu(m1,m1,v1,-15,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,-15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,-15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,-15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,-15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ vbool8_t m2 = __riscv_vlm_v_b8 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_mu(m1,m2,v1,-15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f7 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, -15, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m3, v2, -15, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f8 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, -15, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, -15, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f9 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, -15, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, -15, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+ vbool8_t m5 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m4, v2, -15, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m5, 4);
+}
+
+void f10 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, -15, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m3, v2, -15, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f11 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, -15, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, -15, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n)
+{
+ vbool8_t mask = *(vbool8_t*)base1;
+ vint64m8_t v = __riscv_vle64_v_i64m8 (base1, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i64m8_b8 (v, -15, 4);
+ for (int i = 0; i < n; i++){
+ vint64m8_t v3 = __riscv_vle64_v_i64m8 (base1 + i, 4);
+ vint64m8_t v4 = __riscv_vle64_v_i64m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i64m8_b8_m (mask, v3, -15,32);
+ mask = __riscv_vmslt_vx_i64m8_b8_mu (mask, mask, v4, -15, 32);
+ }
+ __riscv_vsm_v_b8 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n)
+{
+ vbool64_t mask = *(vbool64_t*)base1;
+ vint64m1_t v = __riscv_vle64_v_i64m1 (base1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i64m1_b64 (v, -15, 4);
+ for (int i = 0; i < n; i++){
+ vint64m1_t v3 = __riscv_vle64_v_i64m1 (base1 + i, 4);
+ vint64m1_t v4 = __riscv_vle64_v_i64m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i64m1_b64_m (mask, v3, -15,32);
+ mask = __riscv_vmslt_vx_i64m1_b64_mu (mask, mask, v4, -15, 32);
+ }
+ __riscv_vsm_v_b64 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,231 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_mu(m1,m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ vbool8_t m2 = __riscv_vlm_v_b8 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_mu(m1,m2,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f7 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f8 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f9 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+ vbool8_t m5 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m4, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m5, 4);
+}
+
+void f10 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f11 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n)
+{
+ vbool8_t mask = *(vbool8_t*)base1;
+ vint64m8_t v = __riscv_vle64_v_i64m8 (base1, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i64m8_b8 (v, 0xAAAA, 4);
+ for (int i = 0; i < n; i++){
+ vint64m8_t v3 = __riscv_vle64_v_i64m8 (base1 + i, 4);
+ vint64m8_t v4 = __riscv_vle64_v_i64m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i64m8_b8_m (mask, v3, 0xAAAA,32);
+ mask = __riscv_vmseq_vx_i64m8_b8_mu (mask, mask, v4, 0xAAAA, 32);
+ }
+ __riscv_vsm_v_b8 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n)
+{
+ vbool64_t mask = *(vbool64_t*)base1;
+ vint64m1_t v = __riscv_vle64_v_i64m1 (base1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i64m1_b64 (v, 0xAAAA, 4);
+ for (int i = 0; i < n; i++){
+ vint64m1_t v3 = __riscv_vle64_v_i64m1 (base1 + i, 4);
+ vint64m1_t v4 = __riscv_vle64_v_i64m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i64m1_b64_m (mask, v3, 0xAAAA,32);
+ mask = __riscv_vmseq_vx_i64m1_b64_mu (mask, mask, v4, 0xAAAA, 32);
+ }
+ __riscv_vsm_v_b64 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,231 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_mu(m1,m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ vbool8_t m2 = __riscv_vlm_v_b8 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_mu(m1,m2,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f7 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f8 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f9 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+ vbool8_t m5 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m4, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m5, 4);
+}
+
+void f10 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f11 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n)
+{
+ vbool8_t mask = *(vbool8_t*)base1;
+ vint64m8_t v = __riscv_vle64_v_i64m8 (base1, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i64m8_b8 (v, 0xAAAA, 4);
+ for (int i = 0; i < n; i++){
+ vint64m8_t v3 = __riscv_vle64_v_i64m8 (base1 + i, 4);
+ vint64m8_t v4 = __riscv_vle64_v_i64m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i64m8_b8_m (mask, v3, 0xAAAA,32);
+ mask = __riscv_vmslt_vx_i64m8_b8_mu (mask, mask, v4, 0xAAAA, 32);
+ }
+ __riscv_vsm_v_b8 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n)
+{
+ vbool64_t mask = *(vbool64_t*)base1;
+ vint64m1_t v = __riscv_vle64_v_i64m1 (base1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i64m1_b64 (v, 0xAAAA, 4);
+ for (int i = 0; i < n; i++){
+ vint64m1_t v3 = __riscv_vle64_v_i64m1 (base1 + i, 4);
+ vint64m1_t v4 = __riscv_vle64_v_i64m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i64m1_b64_m (mask, v3, 0xAAAA,32);
+ mask = __riscv_vmslt_vx_i64m1_b64_mu (mask, mask, v4, 0xAAAA, 32);
+ }
+ __riscv_vsm_v_b64 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,231 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8 (base2, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vv_f32m8_b4_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8 (base2, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vv_f32m8_b4_mu(m1,m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8 (base2, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vv_f32m8_b4_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8 (base2, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vv_f32m8_b4_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vv_f32m8_b4_m(m1,v1,v1,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8 (base2, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vv_f32m8_b4_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8 (base2, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ vbool4_t m2 = __riscv_vlm_v_b4 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vv_f32m8_b4_mu(m1,m2,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vv_f32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmfeq_vv_f32m8_b4_mu (m3, m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vv_f32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmfeq_vv_f32m8_b4_m (m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vv_f32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmfeq_vv_f32m8_b4_m (m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmfeq_vv_f32m8_b4_mu (m3, m4, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vv_f32m8_b4 (v, v2, 4);
+ vbool4_t m4 = __riscv_vmfeq_vv_f32m8_b4_mu (m3, m3, v2, v, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vv_f32m8_b4 (v, v2, 4);
+ vbool4_t m4 = __riscv_vmfeq_vv_f32m8_b4_m (m3, v2, v, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (base1, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, base1, 4);
+ mask = __riscv_vmfeq_vv_f32m8_b4 (v, v2, 4);
+ for (int i = 0; i < n; i++){
+ vfloat32m8_t v3 = __riscv_vle32_v_f32m8 (base1 + i, 4);
+ vfloat32m8_t v4 = __riscv_vle32_v_f32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmfeq_vv_f32m8_b4_m (mask, v3, v4,32);
+ mask = __riscv_vmfeq_vv_f32m8_b4_mu (mask, mask, v4, v4, 32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vfloat32m1_t v = __riscv_vle32_v_f32m1 (base1, 4);
+ vfloat32m1_t v2 = __riscv_vle32_v_f32m1_m (mask, base1, 4);
+ mask = __riscv_vmfeq_vv_f32m1_b32 (v, v2, 4);
+ for (int i = 0; i < n; i++){
+ vfloat32m1_t v3 = __riscv_vle32_v_f32m1 (base1 + i, 4);
+ vfloat32m1_t v4 = __riscv_vle32_v_f32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmfeq_vv_f32m1_b32_m (mask, v3, v4,32);
+ mask = __riscv_vmfeq_vv_f32m1_b32_mu (mask, mask, v4, v4, 32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,231 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vf_f32m8_b4_mu(m1,m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ vbool4_t m2 = __riscv_vlm_v_b4 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vf_f32m8_b4_mu(m1,m2,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f7 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmfeq_vf_f32m8_b4_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmfeq_vf_f32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmfeq_vf_f32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmfeq_vf_f32m8_b4_mu (m3, m4, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmfeq_vf_f32m8_b4_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmfeq_vf_f32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n, float x)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (base1, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, base1, 4);
+ mask = __riscv_vmfeq_vf_f32m8_b4 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vfloat32m8_t v3 = __riscv_vle32_v_f32m8 (base1 + i, 4);
+ vfloat32m8_t v4 = __riscv_vle32_v_f32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmfeq_vf_f32m8_b4_m (mask, v3, x,32);
+ mask = __riscv_vmfeq_vf_f32m8_b4_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n, float x)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vfloat32m1_t v = __riscv_vle32_v_f32m1 (base1, 4);
+ vfloat32m1_t v2 = __riscv_vle32_v_f32m1_m (mask, base1, 4);
+ mask = __riscv_vmfeq_vf_f32m1_b32 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vfloat32m1_t v3 = __riscv_vle32_v_f32m1 (base1 + i, 4);
+ vfloat32m1_t v4 = __riscv_vle32_v_f32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmfeq_vf_f32m1_b32_m (mask, v3, x,32);
+ mask = __riscv_vmfeq_vf_f32m1_b32_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,231 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmflt_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmflt_vf_f32m8_b4_mu(m1,m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmflt_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmflt_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmflt_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmflt_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ vbool4_t m2 = __riscv_vlm_v_b4 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmflt_vf_f32m8_b4_mu(m1,m2,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f7 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmflt_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmflt_vf_f32m8_b4_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmflt_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmflt_vf_f32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmflt_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmflt_vf_f32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmflt_vf_f32m8_b4_mu (m3, m4, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmflt_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmflt_vf_f32m8_b4_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmflt_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmflt_vf_f32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n, float x)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (base1, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, base1, 4);
+ mask = __riscv_vmflt_vf_f32m8_b4 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vfloat32m8_t v3 = __riscv_vle32_v_f32m8 (base1 + i, 4);
+ vfloat32m8_t v4 = __riscv_vle32_v_f32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmflt_vf_f32m8_b4_m (mask, v3, x,32);
+ mask = __riscv_vmflt_vf_f32m8_b4_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n, float x)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vfloat32m1_t v = __riscv_vle32_v_f32m1 (base1, 4);
+ vfloat32m1_t v2 = __riscv_vle32_v_f32m1_m (mask, base1, 4);
+ mask = __riscv_vmflt_vf_f32m1_b32 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vfloat32m1_t v3 = __riscv_vle32_v_f32m1 (base1 + i, 4);
+ vfloat32m1_t v4 = __riscv_vle32_v_f32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmflt_vf_f32m1_b32_m (mask, v3, x,32);
+ mask = __riscv_vmflt_vf_f32m1_b32_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */