@@ -162,7 +162,20 @@
(and (match_code "const_vector")
(match_test "op == CONSTM1_RTX (GET_MODE (op))")))
-(define_constraint "Wdm"
+(define_memory_constraint "Wdm"
"Vector duplicate memory operand"
- (and (match_operand 0 "memory_operand")
+ (and (match_code "mem")
(match_code "reg" "0")))
+
+;; (vec_duplicate:V (const_int 2863311530 [0xaaaaaaaa])) of pred_broadcast
+;; is CSEed into (const_vector:V (const_int 2863311530 [0xaaaaaaaa])) here
+;; which is not the pattern matching we want since we can't generate
+;; instruction directly for it when SEW = 64 and !TARGET_64BIT. We should
+;; not allow RA (register allocation) allocate a DImode register in
+;; pred_broadcast pattern.
+(define_constraint "Wbr"
+ "@internal
+ Broadcast register operand"
+ (and (match_code "reg")
+ (match_test "REGNO (op) <= GP_REG_LAST
+ && direct_broadcast_operand (op, GET_MODE (op))")))
@@ -272,9 +272,16 @@
(ior (match_operand 0 "register_operand")
(match_operand 0 "memory_operand")))
+(define_predicate "reg_or_int_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "const_int_operand")))
+
(define_predicate "vector_move_operand"
(ior (match_operand 0 "nonimmediate_operand")
- (match_code "const_vector")))
+ (and (match_code "const_vector")
+ (match_test "reload_completed
+ || satisfies_constraint_vi (op)
+ || satisfies_constraint_Wc0 (op)"))))
(define_predicate "vector_mask_operand"
(ior (match_operand 0 "register_operand")
@@ -315,8 +322,11 @@
;; The scalar operand can be directly broadcast by RVV instructions.
(define_predicate "direct_broadcast_operand"
- (ior (match_operand 0 "register_operand")
- (match_test "satisfies_constraint_Wdm (op)")))
+ (and (match_test "!(reload_completed && !FLOAT_MODE_P (GET_MODE (op))
+ && register_operand (op, GET_MODE (op))
+ && maybe_gt (GET_MODE_BITSIZE (GET_MODE (op)), GET_MODE_BITSIZE (Pmode)))")
+ (ior (match_operand 0 "register_operand")
+ (match_test "satisfies_constraint_Wdm (op)"))))
;; A CONST_INT operand that has exactly two bits cleared.
(define_predicate "const_nottwobits_operand"
@@ -149,7 +149,8 @@ extern tree builtin_decl (unsigned, bool);
extern rtx expand_builtin (unsigned int, tree, rtx);
extern bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
extern bool legitimize_move (rtx, rtx, machine_mode);
-extern void emit_pred_op (unsigned, rtx, rtx, machine_mode);
+extern void emit_vlmax_op (unsigned, rtx, rtx, machine_mode);
+extern void emit_nonvlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
extern enum vlmul_type get_vlmul (machine_mode);
extern unsigned int get_ratio (machine_mode);
extern int get_ta (rtx);
@@ -173,6 +174,11 @@ enum tail_policy get_prefer_tail_policy ();
enum mask_policy get_prefer_mask_policy ();
rtx get_avl_type_rtx (enum avl_type);
opt_machine_mode get_vector_mode (scalar_mode, poly_uint64);
+extern bool simm32_p (rtx);
+extern bool neg_simm5_p (rtx);
+#ifdef RTX_CODE
+extern bool has_vi_variant_p (rtx_code, rtx);
+#endif
}
/* We classify builtin types into two classes:
@@ -78,9 +78,9 @@ public:
add_input_operand (tail_policy_rtx, Pmode);
add_input_operand (mask_policy_rtx, Pmode);
}
- void add_avl_type_operand ()
+ void add_avl_type_operand (avl_type type)
{
- add_input_operand (get_avl_type_rtx (avl_type::VLMAX), Pmode);
+ add_input_operand (gen_int_mode (type, Pmode), Pmode);
}
void expand (enum insn_code icode, bool temporary_volatile_p = false)
@@ -165,29 +165,56 @@ calculate_ratio (unsigned int sew, enum vlmul_type vlmul)
}
/* Emit an RVV unmask && vl mov from SRC to DEST. */
-void
-emit_pred_op (unsigned icode, rtx dest, rtx src, machine_mode mask_mode)
+static void
+emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
+ machine_mode mask_mode)
{
insn_expander<8> e;
machine_mode mode = GET_MODE (dest);
e.add_output_operand (dest, mode);
- e.add_all_one_mask_operand (mask_mode);
+
+ if (mask)
+ e.add_input_operand (mask, GET_MODE (mask));
+ else
+ e.add_all_one_mask_operand (mask_mode);
+
e.add_vundef_operand (mode);
e.add_input_operand (src, GET_MODE (src));
- rtx vlmax = emit_vlmax_vsetvl (mode);
- e.add_input_operand (vlmax, Pmode);
+ if (len)
+ e.add_input_operand (len, Pmode);
+ else
+ {
+ rtx vlmax = emit_vlmax_vsetvl (mode);
+ e.add_input_operand (vlmax, Pmode);
+ }
if (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
e.add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy ());
- e.add_avl_type_operand ();
+ if (len)
+ e.add_avl_type_operand (avl_type::NONVLMAX);
+ else
+ e.add_avl_type_operand (avl_type::VLMAX);
e.expand ((enum insn_code) icode, MEM_P (dest) || MEM_P (src));
}
+void
+emit_vlmax_op (unsigned icode, rtx dest, rtx src, machine_mode mask_mode)
+{
+ emit_pred_op (icode, NULL_RTX, dest, src, NULL_RTX, mask_mode);
+}
+
+void
+emit_nonvlmax_op (unsigned icode, rtx dest, rtx src, rtx len,
+ machine_mode mask_mode)
+{
+ emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode);
+}
+
static void
expand_const_vector (rtx target, rtx src, machine_mode mask_mode)
{
@@ -199,7 +226,7 @@ expand_const_vector (rtx target, rtx src, machine_mode mask_mode)
gcc_assert (
const_vec_duplicate_p (src, &elt)
&& (rtx_equal_p (elt, const0_rtx) || rtx_equal_p (elt, const1_rtx)));
- emit_pred_op (code_for_pred_mov (mode), target, src, mode);
+ emit_vlmax_op (code_for_pred_mov (mode), target, src, mask_mode);
return;
}
@@ -210,10 +237,10 @@ expand_const_vector (rtx target, rtx src, machine_mode mask_mode)
/* Element in range -16 ~ 15 integer or 0.0 floating-point,
we use vmv.v.i instruction. */
if (satisfies_constraint_vi (src) || satisfies_constraint_Wc0 (src))
- emit_pred_op (code_for_pred_mov (mode), tmp, src, mask_mode);
+ emit_vlmax_op (code_for_pred_mov (mode), tmp, src, mask_mode);
else
- emit_pred_op (code_for_pred_broadcast (mode), tmp,
- force_reg (elt_mode, elt), mask_mode);
+ emit_vlmax_op (code_for_pred_broadcast (mode), tmp,
+ force_reg (elt_mode, elt), mask_mode);
if (tmp != target)
emit_move_insn (target, tmp);
@@ -252,12 +279,12 @@ legitimize_move (rtx dest, rtx src, machine_mode mask_mode)
{
rtx tmp = gen_reg_rtx (mode);
if (MEM_P (src))
- emit_pred_op (code_for_pred_mov (mode), tmp, src, mask_mode);
+ emit_vlmax_op (code_for_pred_mov (mode), tmp, src, mask_mode);
else
emit_move_insn (tmp, src);
src = tmp;
}
- emit_pred_op (code_for_pred_mov (mode), dest, src, mask_mode);
+ emit_vlmax_op (code_for_pred_mov (mode), dest, src, mask_mode);
return true;
}
@@ -371,4 +398,39 @@ get_vector_mode (scalar_mode inner_mode, poly_uint64 nunits)
return opt_machine_mode ();
}
+/* Helper functions for handling sew=64 on RV32 system. */
+bool
+simm32_p (rtx x)
+{
+ if (!CONST_INT_P (x))
+ return false;
+ unsigned HOST_WIDE_INT val = UINTVAL (x);
+ return val <= 0x7FFFFFFFULL || val >= 0xFFFFFFFF80000000ULL;
+}
+
+static bool
+simm5_p (rtx x)
+{
+ if (!CONST_INT_P (x))
+ return false;
+ return IN_RANGE (INTVAL (x), -16, 15);
+}
+
+bool
+neg_simm5_p (rtx x)
+{
+ if (!CONST_INT_P (x))
+ return false;
+ return IN_RANGE (INTVAL (x), -15, 16);
+}
+
+bool
+has_vi_variant_p (rtx_code code, rtx x)
+{
+ if (code != PLUS && code != MINUS && code != AND && code != IOR
+ && code != XOR)
+ return false;
+ return simm5_p (x);
+}
+
} // namespace riscv_vector
@@ -155,7 +155,7 @@ public:
};
/* Implements
- * vadd/vsub/vrsub/vand/vor/vxor/vsll/vsra/vsrl/vmin/vmax/vminu/vmaxu/vdiv/vrem/vdivu/vremu/vsadd/vsaddu/vssub/vssubu.
+ * vadd/vsub/vand/vor/vxor/vsll/vsra/vsrl/vmin/vmax/vminu/vmaxu/vdiv/vrem/vdivu/vremu/vsadd/vsaddu/vssub/vssubu.
*/
template<rtx_code CODE>
class binop : public function_base
@@ -175,6 +175,17 @@ public:
}
};
+/* Implements vrsub. */
+class vrsub : public function_base
+{
+public:
+ rtx expand (function_expander &e) const override
+ {
+ return e.use_exact_insn (
+ code_for_pred_sub_reverse_scalar (e.vector_mode ()));
+ }
+};
+
static CONSTEXPR const vsetvl<false> vsetvl_obj;
static CONSTEXPR const vsetvl<true> vsetvlmax_obj;
static CONSTEXPR const loadstore<false, LST_UNIT_STRIDE, false> vle_obj;
@@ -201,7 +212,7 @@ static CONSTEXPR const loadstore<true, LST_INDEXED, true> vsoxei32_obj;
static CONSTEXPR const loadstore<true, LST_INDEXED, true> vsoxei64_obj;
static CONSTEXPR const binop<PLUS> vadd_obj;
static CONSTEXPR const binop<MINUS> vsub_obj;
-static CONSTEXPR const binop<MINUS> vrsub_obj;
+static CONSTEXPR const vrsub vrsub_obj;
static CONSTEXPR const binop<AND> vand_obj;
static CONSTEXPR const binop<IOR> vor_obj;
static CONSTEXPR const binop<XOR> vxor_obj;
@@ -249,6 +260,7 @@ BASE (vsoxei32)
BASE (vsoxei64)
BASE (vadd)
BASE (vsub)
+BASE (vrsub)
BASE (vand)
BASE (vor)
BASE (vxor)
@@ -50,6 +50,7 @@ extern const function_base *const vsoxei32;
extern const function_base *const vsoxei64;
extern const function_base *const vadd;
extern const function_base *const vsub;
+extern const function_base *const vrsub;
extern const function_base *const vand;
extern const function_base *const vor;
extern const function_base *const vxor;
@@ -71,17 +71,32 @@ DEF_RVV_FUNCTION (vxor, binop, full_preds, iu_vvv_ops)
DEF_RVV_FUNCTION (vsll, binop, full_preds, iu_shift_vvv_ops)
DEF_RVV_FUNCTION (vsra, binop, full_preds, iu_shift_vvv_ops)
DEF_RVV_FUNCTION (vsrl, binop, full_preds, iu_shift_vvv_ops)
-DEF_RVV_FUNCTION (vmin, binop, full_preds, iu_vvv_ops)
-DEF_RVV_FUNCTION (vmax, binop, full_preds, iu_vvv_ops)
-DEF_RVV_FUNCTION (vminu, binop, full_preds, iu_vvv_ops)
-DEF_RVV_FUNCTION (vmaxu, binop, full_preds, iu_vvv_ops)
+DEF_RVV_FUNCTION (vmin, binop, full_preds, i_vvv_ops)
+DEF_RVV_FUNCTION (vmax, binop, full_preds, i_vvv_ops)
+DEF_RVV_FUNCTION (vminu, binop, full_preds, u_vvv_ops)
+DEF_RVV_FUNCTION (vmaxu, binop, full_preds, u_vvv_ops)
DEF_RVV_FUNCTION (vmul, binop, full_preds, iu_vvv_ops)
-DEF_RVV_FUNCTION (vdiv, binop, full_preds, iu_vvv_ops)
-DEF_RVV_FUNCTION (vrem, binop, full_preds, iu_vvv_ops)
-DEF_RVV_FUNCTION (vdivu, binop, full_preds, iu_vvv_ops)
-DEF_RVV_FUNCTION (vremu, binop, full_preds, iu_vvv_ops)
+DEF_RVV_FUNCTION (vdiv, binop, full_preds, i_vvv_ops)
+DEF_RVV_FUNCTION (vrem, binop, full_preds, i_vvv_ops)
+DEF_RVV_FUNCTION (vdivu, binop, full_preds, u_vvv_ops)
+DEF_RVV_FUNCTION (vremu, binop, full_preds, u_vvv_ops)
+DEF_RVV_FUNCTION (vadd, binop, full_preds, iu_vvx_ops)
+DEF_RVV_FUNCTION (vsub, binop, full_preds, iu_vvx_ops)
+DEF_RVV_FUNCTION (vrsub, binop, full_preds, iu_vvx_ops)
+DEF_RVV_FUNCTION (vand, binop, full_preds, iu_vvx_ops)
+DEF_RVV_FUNCTION (vor, binop, full_preds, iu_vvx_ops)
+DEF_RVV_FUNCTION (vxor, binop, full_preds, iu_vvx_ops)
DEF_RVV_FUNCTION (vsll, binop, full_preds, iu_shift_vvx_ops)
DEF_RVV_FUNCTION (vsra, binop, full_preds, iu_shift_vvx_ops)
DEF_RVV_FUNCTION (vsrl, binop, full_preds, iu_shift_vvx_ops)
+DEF_RVV_FUNCTION (vmin, binop, full_preds, i_vvx_ops)
+DEF_RVV_FUNCTION (vmax, binop, full_preds, i_vvx_ops)
+DEF_RVV_FUNCTION (vminu, binop, full_preds, u_vvx_ops)
+DEF_RVV_FUNCTION (vmaxu, binop, full_preds, u_vvx_ops)
+DEF_RVV_FUNCTION (vmul, binop, full_preds, iu_vvx_ops)
+DEF_RVV_FUNCTION (vdiv, binop, full_preds, i_vvx_ops)
+DEF_RVV_FUNCTION (vrem, binop, full_preds, i_vvx_ops)
+DEF_RVV_FUNCTION (vdivu, binop, full_preds, u_vvx_ops)
+DEF_RVV_FUNCTION (vremu, binop, full_preds, u_vvx_ops)
#undef DEF_RVV_FUNCTION
@@ -133,6 +133,12 @@ static const rvv_type_info i_ops[] = {
#include "riscv-vector-builtins-types.def"
{NUM_VECTOR_TYPES, 0}};
+/* A list of all signed integer will be registered for intrinsic functions. */
+static const rvv_type_info u_ops[] = {
+#define DEF_RVV_U_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE},
+#include "riscv-vector-builtins-types.def"
+ {NUM_VECTOR_TYPES, 0}};
+
/* A list of all integer will be registered for intrinsic functions. */
static const rvv_type_info iu_ops[] = {
#define DEF_RVV_I_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE},
@@ -244,6 +250,11 @@ static CONSTEXPR const rvv_arg_type_info vv_args[]
= {rvv_arg_type_info (RVV_BASE_vector), rvv_arg_type_info (RVV_BASE_vector),
rvv_arg_type_info_end};
+/* A list of args for vector_type func (vector_type, scalar_type) function. */
+static CONSTEXPR const rvv_arg_type_info vx_args[]
+ = {rvv_arg_type_info (RVV_BASE_vector), rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info_end};
+
/* A list of args for vector_type func (vector_type, shift_type) function. */
static CONSTEXPR const rvv_arg_type_info shift_vv_args[]
= {rvv_arg_type_info (RVV_BASE_vector),
@@ -402,6 +413,46 @@ static CONSTEXPR const rvv_op_info iu_vvv_ops
rvv_arg_type_info (RVV_BASE_vector), /* Return type */
vv_args /* Args */};
+/* A static operand information for vector_type func (vector_type, vector_type)
+ * function registration. */
+static CONSTEXPR const rvv_op_info i_vvv_ops
+ = {i_ops, /* Types */
+ OP_TYPE_vv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ vv_args /* Args */};
+
+/* A static operand information for vector_type func (vector_type, vector_type)
+ * function registration. */
+static CONSTEXPR const rvv_op_info u_vvv_ops
+ = {u_ops, /* Types */
+ OP_TYPE_vv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ vv_args /* Args */};
+
+/* A static operand information for vector_type func (vector_type, scalar_type)
+ * function registration. */
+static CONSTEXPR const rvv_op_info iu_vvx_ops
+ = {iu_ops, /* Types */
+ OP_TYPE_vx, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ vx_args /* Args */};
+
+/* A static operand information for vector_type func (vector_type, scalar_type)
+ * function registration. */
+static CONSTEXPR const rvv_op_info i_vvx_ops
+ = {i_ops, /* Types */
+ OP_TYPE_vx, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ vx_args /* Args */};
+
+/* A static operand information for vector_type func (vector_type, scalar_type)
+ * function registration. */
+static CONSTEXPR const rvv_op_info u_vvx_ops
+ = {u_ops, /* Types */
+ OP_TYPE_vx, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ vx_args /* Args */};
+
/* A static operand information for vector_type func (vector_type, shift_type)
* function registration. */
static CONSTEXPR const rvv_op_info iu_shift_vvv_ops
@@ -312,7 +312,7 @@ ASM_MISA_SPEC
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
/* Others. */ \
- 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
/* Vector registers. */ \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
@@ -55,6 +55,17 @@
(VNx4DI "TARGET_MIN_VLEN > 32") (VNx8DI "TARGET_MIN_VLEN > 32")
])
+(define_mode_iterator VI_QHS [
+ VNx1QI VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32")
+ VNx1HI VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32")
+ VNx1SI VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32")
+])
+
+(define_mode_iterator VI_D [
+ (VNx1DI "TARGET_MIN_VLEN > 32") (VNx2DI "TARGET_MIN_VLEN > 32")
+ (VNx4DI "TARGET_MIN_VLEN > 32") (VNx8DI "TARGET_MIN_VLEN > 32")
+])
+
(define_mode_iterator VNX1_QHSD [
VNx1QI VNx1HI VNx1SI
(VNx1DI "TARGET_MIN_VLEN > 32")
@@ -171,6 +182,14 @@
(VNx1DF "DF") (VNx2DF "DF") (VNx4DF "DF") (VNx8DF "DF")
])
+(define_mode_attr VSUBEL [
+ (VNx1HI "QI") (VNx2HI "QI") (VNx4HI "QI") (VNx8HI "QI") (VNx16HI "QI") (VNx32HI "QI")
+ (VNx1SI "HI") (VNx2SI "HI") (VNx4SI "HI") (VNx8SI "HI") (VNx16SI "HI")
+ (VNx1DI "SI") (VNx2DI "SI") (VNx4DI "SI") (VNx8DI "SI")
+ (VNx1SF "HF") (VNx2SF "HF") (VNx4SF "HF") (VNx8SF "HF") (VNx16SF "HF")
+ (VNx1DF "SF") (VNx2DF "SF") (VNx4DF "SF") (VNx8DF "SF")
+])
+
(define_mode_attr sew [
(VNx1QI "8") (VNx2QI "8") (VNx4QI "8") (VNx8QI "8") (VNx16QI "8") (VNx32QI "8") (VNx64QI "8")
(VNx1HI "16") (VNx2HI "16") (VNx4HI "16") (VNx8HI "16") (VNx16HI "16") (VNx32HI "16")
@@ -190,6 +209,12 @@
smax umax smin umin mult div udiv mod umod
])
+(define_code_iterator any_commutative_binop [plus and ior xor
+ smax umax smin umin mult
+])
+
+(define_code_iterator any_non_commutative_binop [minus div udiv mod umod])
+
(define_code_attr binop_rhs1_predicate [
(plus "register_operand")
(minus "vector_arith_operand")
@@ -294,9 +319,9 @@
(mod "rem.vv")
(udiv "divu.vv")
(umod "remu.vv")
- (ior "or.vv")
- (xor "xor.vv")
- (and "and.vv")
+ (ior "or.vi")
+ (xor "xor.vi")
+ (and "and.vi")
(plus "add.vi")
(minus "add.vi")
(smin "min.vv")
@@ -332,9 +357,9 @@
(mod "%3,%4")
(udiv "%3,%4")
(umod "%3,%4")
- (ior "%3,%4")
- (xor "%3,%4")
- (and "%3,%4")
+ (ior "%3,%v4")
+ (xor "%3,%v4")
+ (and "%3,%v4")
(plus "%3,%v4")
(minus "%3,%V4")
(smin "%3,%4")
@@ -312,7 +312,7 @@
(define_expand "mov<mode>"
[(set (match_operand:V 0 "reg_or_mem_operand")
- (match_operand:V 1 "vector_move_operand"))]
+ (match_operand:V 1 "general_operand"))]
"TARGET_VECTOR"
{
/* For whole register move, we transform the pattern into the format
@@ -398,7 +398,7 @@
(define_expand "mov<mode>"
[(set (match_operand:VB 0 "reg_or_mem_operand")
- (match_operand:VB 1 "vector_move_operand"))]
+ (match_operand:VB 1 "general_operand"))]
"TARGET_VECTOR"
{
if (riscv_vector::legitimize_move (operands[0], operands[1], <MODE>mode))
@@ -427,8 +427,8 @@
(match_operand:<VEL> 1 "direct_broadcast_operand")))]
"TARGET_VECTOR"
{
- riscv_vector::emit_pred_op (
- code_for_pred_broadcast (<MODE>mode), operands[0], operands[1], <VM>mode);
+ riscv_vector::emit_vlmax_op (code_for_pred_broadcast (<MODE>mode),
+ operands[0], operands[1], <VM>mode);
DONE;
}
)
@@ -775,7 +775,15 @@
;; - 13.16 Vector Floating-Point Move Instruction (vfmv.v.f)
;; -------------------------------------------------------------------------------
-(define_insn "@pred_broadcast<mode>"
+;; According to RVV ISA, vector-scalar instruction doesn't support
+;; operand fetched from 2 consecutive registers, so we should use
+;; vlse.v which is a memory access to broadcast a DImode scalar into a vector.
+;;
+;; Since the optimization flow in GCC is as follows:
+;; expand --> LICM (Loop invariant) --> split.
+;; To use LICM optimization, we postpone generation of vlse.v to split stage since
+;; a memory access instruction can not be optimized by LICM (Loop invariant).
+(define_insn_and_split "@pred_broadcast<mode>"
[(set (match_operand:V 0 "register_operand" "=vr, vr, vr, vr")
(if_then_else:V
(unspec:<VM>
@@ -787,14 +795,31 @@
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(vec_duplicate:V
- (match_operand:<VEL> 3 "direct_broadcast_operand" " r, f, Wdm, Wdm"))
- (match_operand:V 2 "vector_merge_operand" "vu0, vu0, vu0, vu0")))]
+ (match_operand:<VEL> 3 "direct_broadcast_operand" "Wbr, f, Wdm, Wdm"))
+ (match_operand:V 2 "vector_merge_operand" "0vu, 0vu, 0vu, 0vu")))]
"TARGET_VECTOR"
"@
vmv.v.x\t%0,%3
vfmv.v.f\t%0,%3
vlse<sew>.v\t%0,%3,zero,%1.t
vlse<sew>.v\t%0,%3,zero"
+ "!FLOAT_MODE_P (<MODE>mode) && register_operand (operands[3], <VEL>mode)
+ && GET_MODE_BITSIZE (<VEL>mode) > GET_MODE_BITSIZE (Pmode)"
+ [(set (match_dup 0)
+ (if_then_else:V (unspec:<VM> [(match_dup 1) (match_dup 4)
+ (match_dup 5) (match_dup 6) (match_dup 7)
+ (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (vec_duplicate:V (match_dup 3))
+ (match_dup 2)))]
+ {
+ gcc_assert (can_create_pseudo_p ());
+ rtx m = assign_stack_local (<VEL>mode, GET_MODE_SIZE (<VEL>mode),
+ GET_MODE_ALIGNMENT (<VEL>mode));
+ m = validize_mem (m);
+ emit_move_insn (m, operands[3]);
+ m = gen_rtx_MEM (<VEL>mode, force_reg (Pmode, XEXP (m, 0)));
+ operands[3] = m;
+ }
[(set_attr "type" "vimov,vfmov,vlds,vlds")
(set_attr "mode" "<MODE>")])
@@ -1179,3 +1204,330 @@
v<insn>.vi\t%0,%3,%4%p1"
[(set_attr "type" "vshift")
(set_attr "mode" "<MODE>")])
+
+;; Handle GET_MODE_INNER (mode) = QImode, HImode, SImode.
+(define_insn "@pred_<optab><mode>_scalar"
+ [(set (match_operand:VI_QHS 0 "register_operand" "=vd, vr")
+ (if_then_else:VI_QHS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (any_commutative_binop:VI_QHS
+ (vec_duplicate:VI_QHS
+ (match_operand:<VEL> 4 "register_operand" " r, r"))
+ (match_operand:VI_QHS 3 "register_operand" " vr, vr"))
+ (match_operand:VI_QHS 2 "vector_merge_operand" "0vu,0vu")))]
+ "TARGET_VECTOR"
+ "v<insn>.vx\t%0,%3,%4%p1"
+ [(set_attr "type" "<int_binop_insn_type>")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@pred_<optab><mode>_scalar"
+ [(set (match_operand:VI_QHS 0 "register_operand" "=vd, vr")
+ (if_then_else:VI_QHS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (any_non_commutative_binop:VI_QHS
+ (match_operand:VI_QHS 3 "register_operand" " vr, vr")
+ (vec_duplicate:VI_QHS
+ (match_operand:<VEL> 4 "register_operand" " r, r")))
+ (match_operand:VI_QHS 2 "vector_merge_operand" "0vu,0vu")))]
+ "TARGET_VECTOR"
+ "v<insn>.vx\t%0,%3,%4%p1"
+ [(set_attr "type" "<int_binop_insn_type>")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@pred_sub<mode>_reverse_scalar"
+ [(set (match_operand:VI_QHS 0 "register_operand" "=vd, vr")
+ (if_then_else:VI_QHS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (minus:VI_QHS
+ (vec_duplicate:VI_QHS
+ (match_operand:<VEL> 4 "register_operand" " r, r"))
+ (match_operand:VI_QHS 3 "register_operand" " vr, vr"))
+ (match_operand:VI_QHS 2 "vector_merge_operand" "0vu,0vu")))]
+ "TARGET_VECTOR"
+ "vrsub.vx\t%0,%3,%4%p1"
+ [(set_attr "type" "vialu")
+ (set_attr "mode" "<MODE>")])
+
+;; Handle GET_MODE_INNER (mode) = DImode. We need to split them since
+;; we need to deal with SEW = 64 in RV32 system.
+(define_expand "@pred_<optab><mode>_scalar"
+ [(set (match_operand:VI_D 0 "register_operand")
+ (if_then_else:VI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 5 "vector_length_operand")
+ (match_operand 6 "const_int_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (any_commutative_binop:VI_D
+ (vec_duplicate:VI_D
+ (match_operand:<VEL> 4 "reg_or_int_operand"))
+ (match_operand:VI_D 3 "register_operand"))
+ (match_operand:VI_D 2 "vector_merge_operand")))]
+ "TARGET_VECTOR"
+ {
+ if (riscv_vector::has_vi_variant_p (<CODE>, operands[4]))
+ operands[4] = force_reg (<VEL>mode, operands[4]);
+ else if (!TARGET_64BIT)
+ {
+ rtx v = gen_reg_rtx (<MODE>mode);
+
+ if (riscv_vector::simm32_p (operands[4]))
+ operands[4] = gen_rtx_SIGN_EXTEND (<VEL>mode,
+ force_reg (Pmode, operands[4]));
+ else
+ {
+ if (CONST_INT_P (operands[4]))
+ operands[4] = force_reg (<VEL>mode, operands[4]);
+
+ riscv_vector::emit_nonvlmax_op (code_for_pred_broadcast (<MODE>mode),
+ v, operands[4], operands[5], <VM>mode);
+ emit_insn (gen_pred_<optab><mode> (operands[0], operands[1],
+ operands[2], operands[3], v, operands[5],
+ operands[6], operands[7], operands[8]));
+ DONE;
+ }
+ }
+ else
+ operands[4] = force_reg (<VEL>mode, operands[4]);
+ })
+
+(define_insn "*pred_<optab><mode>_scalar"
+ [(set (match_operand:VI_D 0 "register_operand" "=vd, vr")
+ (if_then_else:VI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (any_commutative_binop:VI_D
+ (vec_duplicate:VI_D
+ (match_operand:<VEL> 4 "register_operand" " r, r"))
+ (match_operand:VI_D 3 "register_operand" " vr, vr"))
+ (match_operand:VI_D 2 "vector_merge_operand" "0vu,0vu")))]
+ "TARGET_VECTOR"
+ "v<insn>.vx\t%0,%3,%4%p1"
+ [(set_attr "type" "<int_binop_insn_type>")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*pred_<optab><mode>_extended_scalar"
+ [(set (match_operand:VI_D 0 "register_operand" "=vd, vr")
+ (if_then_else:VI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (any_commutative_binop:VI_D
+ (vec_duplicate:VI_D
+ (sign_extend:<VEL>
+ (match_operand:<VSUBEL> 4 "register_operand" " r, r")))
+ (match_operand:VI_D 3 "register_operand" " vr, vr"))
+ (match_operand:VI_D 2 "vector_merge_operand" "0vu,0vu")))]
+ "TARGET_VECTOR"
+ "v<insn>.vx\t%0,%3,%4%p1"
+ [(set_attr "type" "<int_binop_insn_type>")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "@pred_<optab><mode>_scalar"
+ [(set (match_operand:VI_D 0 "register_operand")
+ (if_then_else:VI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 5 "vector_length_operand")
+ (match_operand 6 "const_int_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (any_non_commutative_binop:VI_D
+ (match_operand:VI_D 3 "register_operand")
+ (vec_duplicate:VI_D
+ (match_operand:<VEL> 4 "reg_or_int_operand")))
+ (match_operand:VI_D 2 "vector_merge_operand")))]
+ "TARGET_VECTOR"
+ {
+ if (riscv_vector::has_vi_variant_p (<CODE>, operands[4]))
+ operands[4] = force_reg (<VEL>mode, operands[4]);
+ else if (!TARGET_64BIT)
+ {
+ rtx v = gen_reg_rtx (<MODE>mode);
+
+ if (riscv_vector::simm32_p (operands[4]))
+ operands[4] = gen_rtx_SIGN_EXTEND (<VEL>mode,
+ force_reg (Pmode, operands[4]));
+ else
+ {
+ if (CONST_INT_P (operands[4]))
+ operands[4] = force_reg (<VEL>mode, operands[4]);
+
+ riscv_vector::emit_nonvlmax_op (code_for_pred_broadcast (<MODE>mode),
+ v, operands[4], operands[5], <VM>mode);
+ emit_insn (gen_pred_<optab><mode> (operands[0], operands[1],
+ operands[2], operands[3], v, operands[5],
+ operands[6], operands[7], operands[8]));
+ DONE;
+ }
+ }
+ else
+ operands[4] = force_reg (<VEL>mode, operands[4]);
+ })
+
+(define_insn "*pred_<optab><mode>_scalar"
+ [(set (match_operand:VI_D 0 "register_operand" "=vd, vr")
+ (if_then_else:VI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (any_non_commutative_binop:VI_D
+ (match_operand:VI_D 3 "register_operand" " vr, vr")
+ (vec_duplicate:VI_D
+ (match_operand:<VEL> 4 "register_operand" " r, r")))
+ (match_operand:VI_D 2 "vector_merge_operand" "0vu,0vu")))]
+ "TARGET_VECTOR"
+ "v<insn>.vx\t%0,%3,%4%p1"
+ [(set_attr "type" "<int_binop_insn_type>")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*pred_<optab><mode>_extended_scalar"
+ [(set (match_operand:VI_D 0 "register_operand" "=vd, vr")
+ (if_then_else:VI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (any_non_commutative_binop:VI_D
+ (match_operand:VI_D 3 "register_operand" " vr, vr")
+ (vec_duplicate:VI_D
+ (sign_extend:<VEL>
+ (match_operand:<VSUBEL> 4 "register_operand" " r, r"))))
+ (match_operand:VI_D 2 "vector_merge_operand" "0vu,0vu")))]
+ "TARGET_VECTOR"
+ "v<insn>.vx\t%0,%3,%4%p1"
+ [(set_attr "type" "<int_binop_insn_type>")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "@pred_sub<mode>_reverse_scalar"
+ [(set (match_operand:VI_D 0 "register_operand")
+ (if_then_else:VI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 5 "vector_length_operand")
+ (match_operand 6 "const_int_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (minus:VI_D
+ (vec_duplicate:VI_D
+ (match_operand:<VEL> 4 "reg_or_int_operand"))
+ (match_operand:VI_D 3 "register_operand"))
+ (match_operand:VI_D 2 "vector_merge_operand")))]
+ "TARGET_VECTOR"
+ {
+ if (riscv_vector::neg_simm5_p (operands[4]))
+ operands[4] = force_reg (<VEL>mode, operands[4]);
+ else if (!TARGET_64BIT)
+ {
+ rtx v = gen_reg_rtx (<MODE>mode);
+
+ if (riscv_vector::simm32_p (operands[4]))
+ operands[4] = gen_rtx_SIGN_EXTEND (<VEL>mode,
+ force_reg (Pmode, operands[4]));
+ else
+ {
+ if (CONST_INT_P (operands[4]))
+ operands[4] = force_reg (<VEL>mode, operands[4]);
+
+ riscv_vector::emit_nonvlmax_op (code_for_pred_broadcast (<MODE>mode),
+ v, operands[4], operands[5], <VM>mode);
+ emit_insn (gen_pred_sub<mode> (operands[0], operands[1],
+ operands[2], operands[3], v, operands[5],
+ operands[6], operands[7], operands[8]));
+ DONE;
+ }
+ }
+ else
+ operands[4] = force_reg (<VEL>mode, operands[4]);
+ })
+
+(define_insn "*pred_sub<mode>_reverse_scalar"
+ [(set (match_operand:VI_D 0 "register_operand" "=vd, vr")
+ (if_then_else:VI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (minus:VI_D
+ (vec_duplicate:VI_D
+ (match_operand:<VEL> 4 "register_operand" " r, r"))
+ (match_operand:VI_D 3 "register_operand" " vr, vr"))
+ (match_operand:VI_D 2 "vector_merge_operand" "0vu,0vu")))]
+ "TARGET_VECTOR"
+ "vrsub.vx\t%0,%3,%4%p1"
+ [(set_attr "type" "vialu")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*pred_sub<mode>_extended_reverse_scalar"
+ [(set (match_operand:VI_D 0 "register_operand" "=vd, vr")
+ (if_then_else:VI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (minus:VI_D
+ (vec_duplicate:VI_D
+ (sign_extend:<VEL>
+ (match_operand:<VSUBEL> 4 "register_operand" " r, r")))
+ (match_operand:VI_D 3 "register_operand" " vr, vr"))
+ (match_operand:VI_D 2 "vector_merge_operand" "0vu,0vu")))]
+ "TARGET_VECTOR"
+ "vrsub.vx\t%0,%3,%4%p1"
+ [(set_attr "type" "vialu")
+ (set_attr "mode" "<MODE>")])