@@ -180,3 +180,144 @@
NULL_RTX, <VM>mode);
DONE;
})
+
+;; =========================================================================
+;; == Comparisons and selects
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Compare and select
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Integer (signed) vcond. Don't enforce an immediate range here, since it
+;; depends on the comparison; leave it to riscv_vector::expand_vcond instead.
+(define_expand "vcond<V:mode><VI:mode>"
+ [(set (match_operand:V 0 "register_operand")
+ (if_then_else:V
+ (match_operator 3 "comparison_operator"
+ [(match_operand:VI 4 "register_operand")
+ (match_operand:VI 5 "nonmemory_operand")])
+ (match_operand:V 1 "nonmemory_operand")
+ (match_operand:V 2 "nonmemory_operand")))]
+ "TARGET_VECTOR && known_eq (GET_MODE_NUNITS (<V:MODE>mode),
+ GET_MODE_NUNITS (<VI:MODE>mode))"
+ {
+ riscv_vector::expand_vcond (<VI:MODE>mode, operands);
+ DONE;
+ }
+)
+
+;; Integer vcondu. Don't enforce an immediate range here, since it
+;; depends on the comparison; leave it to riscv_vector::expand_vcond instead.
+(define_expand "vcondu<V:mode><VI:mode>"
+ [(set (match_operand:V 0 "register_operand")
+ (if_then_else:V
+ (match_operator 3 "comparison_operator"
+ [(match_operand:VI 4 "register_operand")
+ (match_operand:VI 5 "nonmemory_operand")])
+ (match_operand:V 1 "nonmemory_operand")
+ (match_operand:V 2 "nonmemory_operand")))]
+ "TARGET_VECTOR && known_eq (GET_MODE_NUNITS (<V:MODE>mode),
+ GET_MODE_NUNITS (<VI:MODE>mode))"
+ {
+ riscv_vector::expand_vcond (<VI:MODE>mode, operands);
+ DONE;
+ }
+)
+
+;; Floating-point vcond. Don't enforce an immediate range here, since it
+;; depends on the comparison; leave it to riscv_vector::expand_vcond instead.
+(define_expand "vcond<V:mode><VF:mode>"
+ [(set (match_operand:V 0 "register_operand")
+ (if_then_else:V
+ (match_operator 3 "comparison_operator"
+ [(match_operand:VF 4 "register_operand")
+ (match_operand:VF 5 "nonmemory_operand")])
+ (match_operand:V 1 "nonmemory_operand")
+ (match_operand:V 2 "nonmemory_operand")))]
+ "TARGET_VECTOR && known_eq (GET_MODE_NUNITS (<V:MODE>mode),
+ GET_MODE_NUNITS (<VF:MODE>mode))"
+ {
+ riscv_vector::expand_vcond (<VF:MODE>mode, operands);
+ DONE;
+ }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Comparisons
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vms<eq/ne/ltu/lt/leu/le/gtu/gt>.<vv/vx/vi>
+;; -------------------------------------------------------------------------
+
+;; Signed integer comparisons. Don't enforce an immediate range here, since
+;; it depends on the comparison; leave it to riscv_vector::expand_vec_cmp_int
+;; instead.
+(define_expand "vec_cmp<mode><vm>"
+ [(set (match_operand:<VM> 0 "register_operand")
+ (match_operator:<VM> 1 "comparison_operator"
+ [(match_operand:VI 2 "register_operand")
+ (match_operand:VI 3 "nonmemory_operand")]))]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::expand_vec_cmp_int (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3]);
+ DONE;
+ }
+)
+
+;; Unsigned integer comparisons. Don't enforce an immediate range here, since
+;; it depends on the comparison; leave it to riscv_vector::expand_vec_cmp_int
+;; instead.
+(define_expand "vec_cmpu<mode><vm>"
+ [(set (match_operand:<VM> 0 "register_operand")
+ (match_operator:<VM> 1 "comparison_operator"
+ [(match_operand:VI 2 "register_operand")
+ (match_operand:VI 3 "nonmemory_operand")]))]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::expand_vec_cmp_int (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3]);
+ DONE;
+ }
+)
+
+;; Floating-point comparisons. Don't enforce an immediate range here, since
+;; it depends on the comparison; leave it to riscv_vector::expand_vec_cmp_float
+;; instead.
+(define_expand "vec_cmp<mode><vm>"
+ [(set (match_operand:<VM> 0 "register_operand")
+ (match_operator:<VM> 1 "comparison_operator"
+ [(match_operand:VF 2 "register_operand")
+ (match_operand:VF 3 "nonmemory_operand")]))]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::expand_vec_cmp_float (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3], false);
+ DONE;
+ }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Select based on masks
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - vmerge.vv
+;; - vmerge.vx
+;; - vfmerge.vf
+;; -------------------------------------------------------------------------
+
+(define_expand "vcond_mask_<mode><vm>"
+ [(match_operand:V 0 "register_operand")
+ (match_operand:<VM> 3 "register_operand")
+ (match_operand:V 1 "nonmemory_operand")
+ (match_operand:V 2 "register_operand")]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::emit_merge_op (operands[0], operands[2],
+ operands[1], operands[3]);
+ DONE;
+ }
+)
@@ -201,6 +201,8 @@ bool simm5_p (rtx);
bool neg_simm5_p (rtx);
#ifdef RTX_CODE
bool has_vi_variant_p (rtx_code, rtx);
+void expand_vec_cmp_int (rtx, rtx_code, rtx, rtx);
+bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode, machine_mode,
bool, void (*)(rtx *, rtx));
@@ -223,6 +225,8 @@ machine_mode preferred_simd_mode (scalar_mode);
opt_machine_mode get_mask_mode (machine_mode);
void expand_vec_series (rtx, rtx, rtx);
void expand_vec_init (rtx, rtx);
+void expand_vcond (machine_mode, rtx *);
+void emit_merge_op (rtx, rtx, rtx, rtx);
/* Rounding mode bitfield for fixed point VXRM. */
enum vxrm_field_enum
{
@@ -77,6 +77,16 @@ public:
create_input_operand (&m_ops[m_opno++], x, mode);
gcc_assert (m_opno <= MAX_OPERANDS);
}
+ void add_fixed_operand (rtx x)
+ {
+ create_fixed_operand (&m_ops[m_opno++], x);
+ gcc_assert (m_opno <= MAX_OPERANDS);
+ }
+ void add_integer_operand (rtx x)
+ {
+ create_integer_operand (&m_ops[m_opno++], INTVAL (x));
+ gcc_assert (m_opno <= MAX_OPERANDS);
+ }
void add_all_one_mask_operand (machine_mode mode)
{
add_input_operand (CONSTM1_RTX (mode), mode);
@@ -85,11 +95,14 @@ public:
{
add_input_operand (RVV_VUNDEF (mode), mode);
}
- void add_policy_operand (enum tail_policy vta, enum mask_policy vma)
+ void add_policy_operand (enum tail_policy vta)
{
rtx tail_policy_rtx = gen_int_mode (vta, Pmode);
- rtx mask_policy_rtx = gen_int_mode (vma, Pmode);
add_input_operand (tail_policy_rtx, Pmode);
+ }
+ void add_policy_operand (enum mask_policy vma)
+ {
+ rtx mask_policy_rtx = gen_int_mode (vma, Pmode);
add_input_operand (mask_policy_rtx, Pmode);
}
void add_avl_type_operand (avl_type type)
@@ -97,7 +110,8 @@ public:
add_input_operand (gen_int_mode (type, Pmode), Pmode);
}
- void set_dest_and_mask (rtx mask, rtx dest, machine_mode mask_mode)
+ void set_dest_and_mask (rtx mask, rtx dest, rtx maskoff,
+ machine_mode mask_mode)
{
dest_mode = GET_MODE (dest);
has_dest = true;
@@ -109,35 +123,73 @@ public:
else
add_all_one_mask_operand (mask_mode);
- add_vundef_operand (dest_mode);
+ if (maskoff)
+ add_input_operand (maskoff, GET_MODE (maskoff));
+ else
+ add_vundef_operand (dest_mode);
+ }
+
+ bool set_len (rtx len, bool force_vlmax = false)
+ {
+ bool vlmax_p = force_vlmax || !len;
+ gcc_assert (has_dest);
+
+ if (vlmax_p && const_vlmax_p (dest_mode))
+ {
+ /* Optimize VLS-VLMAX code gen, we can use vsetivli instead of the
+ vsetvli to obtain the value of vlmax. */
+ poly_uint64 nunits = GET_MODE_NUNITS (dest_mode);
+ len = gen_int_mode (nunits, Pmode);
+ vlmax_p = false; /* It has became NONVLMAX now. */
+ }
+ else if (!len)
+ {
+ len = gen_reg_rtx (Pmode);
+ emit_vlmax_vsetvl (dest_mode, len);
+ }
+
+ add_input_operand (len, Pmode);
+ return vlmax_p;
}
void set_len_and_policy (rtx len, bool force_vlmax = false)
- {
- bool vlmax_p = force_vlmax || !len;
- gcc_assert (has_dest);
+ {
+ bool vlmax_p = set_len (len, force_vlmax);
+ add_avl_type_operand (vlmax_p ? avl_type::VLMAX : avl_type::NONVLMAX);
+ }
- if (vlmax_p && const_vlmax_p (dest_mode))
- {
- /* Optimize VLS-VLMAX code gen, we can use vsetivli instead of the
- vsetvli to obtain the value of vlmax. */
- poly_uint64 nunits = GET_MODE_NUNITS (dest_mode);
- len = gen_int_mode (nunits, Pmode);
- vlmax_p = false; /* It has became NONVLMAX now. */
- }
- else if (!len)
- {
- len = gen_reg_rtx (Pmode);
- emit_vlmax_vsetvl (dest_mode, len);
- }
+ void set_len_and_policy (rtx len, enum tail_policy ta, enum mask_policy ma,
+ bool force_vlmax = false)
+ {
+ bool vlmax_p = set_len (len, force_vlmax);
+ add_policy_operand (ta);
+ add_policy_operand (ma);
+ add_avl_type_operand (vlmax_p ? avl_type::VLMAX : avl_type::NONVLMAX);
+ }
- add_input_operand (len, Pmode);
+ void set_len_and_policy (rtx len, enum tail_policy ta,
+ bool force_vlmax = false)
+ {
+ bool vlmax_p = set_len (len, force_vlmax);
+ add_policy_operand (ta);
+ add_avl_type_operand (vlmax_p ? avl_type::VLMAX : avl_type::NONVLMAX);
+ }
- if (GET_MODE_CLASS (dest_mode) != MODE_VECTOR_BOOL)
- add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy ());
+ void set_len_and_policy (rtx len, enum mask_policy ma,
+ bool force_vlmax = false)
+ {
+ bool vlmax_p = set_len (len, force_vlmax);
+ add_policy_operand (ma);
+ add_avl_type_operand (vlmax_p ? avl_type::VLMAX : avl_type::NONVLMAX);
+ }
- add_avl_type_operand (vlmax_p ? avl_type::VLMAX : avl_type::NONVLMAX);
- }
+ void set_dest_merge (rtx dest)
+ {
+ dest_mode = GET_MODE (dest);
+ has_dest = true;
+ add_output_operand (dest, dest_mode);
+ add_vundef_operand (dest_mode);
+ }
void expand (enum insn_code icode, bool temporary_volatile_p = false)
{
@@ -150,6 +202,8 @@ public:
expand_insn (icode, m_opno, m_ops);
}
+ int opno (void) { return m_opno; }
+
private:
int m_opno;
bool has_dest;
@@ -252,11 +306,14 @@ emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
machine_mode mask_mode, bool force_vlmax = false)
{
insn_expander<8> e;
- e.set_dest_and_mask (mask, dest, mask_mode);
+ e.set_dest_and_mask (mask, dest, NULL_RTX, mask_mode);
e.add_input_operand (src, GET_MODE (src));
- e.set_len_and_policy (len, force_vlmax);
+ if (GET_MODE_CLASS (GET_MODE (dest)) == MODE_VECTOR_BOOL)
+ e.set_len_and_policy (len, force_vlmax);
+ else
+ e.set_len_and_policy (len, TAIL_ANY, MASK_ANY, force_vlmax);
e.expand ((enum insn_code) icode, MEM_P (dest) || MEM_P (src));
}
@@ -265,11 +322,11 @@ emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
specified using SCALAR_MODE. */
static void
emit_pred_binop (unsigned icode, rtx mask, rtx dest, rtx src1, rtx src2,
- rtx len, machine_mode mask_mode,
- machine_mode scalar_mode = VOIDmode)
+ rtx len, enum tail_policy ta, enum mask_policy ma,
+ machine_mode mask_mode, machine_mode scalar_mode = VOIDmode)
{
insn_expander<9> e;
- e.set_dest_and_mask (mask, dest, mask_mode);
+ e.set_dest_and_mask (mask, dest, NULL_RTX, mask_mode);
gcc_assert (VECTOR_MODE_P (GET_MODE (src1))
|| VECTOR_MODE_P (GET_MODE (src2)));
@@ -284,9 +341,32 @@ emit_pred_binop (unsigned icode, rtx mask, rtx dest, rtx src1, rtx src2,
else
e.add_input_operand (src2, scalar_mode);
- e.set_len_and_policy (len);
+ /* BOOL arithmetic operations do not depend on policies. */
+ if (GET_MODE_CLASS (GET_MODE (src1)) == MODE_VECTOR_BOOL)
+ e.set_len_and_policy (len);
+ else
+ e.set_len_and_policy (len, ta, ma);
+
+ e.expand ((enum insn_code) icode,
+ MEM_P (dest) || MEM_P (src1) || MEM_P (src2));
+}
- e.expand ((enum insn_code) icode, MEM_P (dest) || MEM_P (src1) || MEM_P (src2));
+/* Emit an RVV unop. */
+static void
+emit_pred_unop (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
+ enum tail_policy ta, enum mask_policy ma,
+ machine_mode mask_mode)
+{
+ insn_expander<9> e;
+ e.set_dest_and_mask (mask, dest, NULL_RTX, mask_mode);
+ gcc_assert (VECTOR_MODE_P (GET_MODE (src)));
+ e.add_input_operand (src, GET_MODE (src));
+ /* BOOL arithmetic operations do not depend on policies. */
+ if (GET_MODE_CLASS (GET_MODE (src)) == MODE_VECTOR_BOOL)
+ e.set_len_and_policy (len);
+ else
+ e.set_len_and_policy (len, ta, ma);
+ e.expand ((enum insn_code) icode, false);
}
/* The RISC-V vsetvli pass uses "known vlmax" operations for optimization.
@@ -336,19 +416,27 @@ void
emit_len_binop (unsigned icode, rtx dest, rtx src1, rtx src2, rtx len,
machine_mode mask_mode, machine_mode scalar_mode)
{
- emit_pred_binop (icode, NULL_RTX, dest, src1, src2, len,
+ emit_pred_binop (icode, NULL_RTX, dest, src1, src2, len, TAIL_ANY, MASK_ANY,
mask_mode, scalar_mode);
}
+static void
+emit_len_unop (unsigned icode, rtx dest, rtx src, rtx len,
+ machine_mode mask_mode)
+{
+ emit_pred_unop (icode, NULL_RTX, dest, src, len, TAIL_ANY, MASK_ANY,
+ mask_mode);
+}
+
/* Emit vid.v instruction. */
static void
emit_index_op (rtx dest, machine_mode mask_mode)
{
insn_expander<7> e;
- e.set_dest_and_mask (NULL, dest, mask_mode);
+ e.set_dest_and_mask (NULL, dest, NULL_RTX, mask_mode);
- e.set_len_and_policy (NULL, true);
+ e.set_len_and_policy (NULL, TAIL_ANY, MASK_ANY, true);
e.expand (code_for_pred_series (GET_MODE (dest)), false);
}
@@ -1278,4 +1366,328 @@ expand_vec_init (rtx target, rtx vals)
expand_vector_init_insert_elems (target, v, nelts);
}
+/* Emit merge instruction. */
+
+void
+emit_merge_op (rtx dest, rtx src1, rtx src2, rtx mask)
+{
+ insn_expander<8> e;
+ machine_mode mode = GET_MODE (dest);
+ e.set_dest_merge (dest);
+ e.add_input_operand (src1, mode);
+ if (VECTOR_MODE_P (GET_MODE (src2)))
+ e.add_input_operand (src2, mode);
+ else
+ e.add_input_operand (src2, GET_MODE_INNER (mode));
+
+ e.add_input_operand (mask, GET_MODE (mask));
+ e.set_len_and_policy (NULL_RTX, TAIL_ANY, true);
+ if (VECTOR_MODE_P (GET_MODE (src2)))
+ e.expand (code_for_pred_merge (mode), false);
+ else
+ e.expand (code_for_pred_merge_scalar (mode), false);
+}
+
+/* Expand an RVV vcond pattern with operands OPS. DATA_MODE is the mode
+ of the data being merged and CMP_MODE is the mode of the values being
+ compared. */
+
+void
+expand_vcond (machine_mode cmp_mode, rtx *ops)
+{
+ machine_mode mask_mode = get_mask_mode (cmp_mode).require ();
+ rtx mask = gen_reg_rtx (mask_mode);
+ if (FLOAT_MODE_P (cmp_mode))
+ {
+ if (expand_vec_cmp_float (mask, GET_CODE (ops[3]), ops[4], ops[5], true))
+ std::swap (ops[1], ops[2]);
+ }
+ else
+ expand_vec_cmp_int (mask, GET_CODE (ops[3]), ops[4], ops[5]);
+
+ if (!CONST_VECTOR_P (ops[1]))
+ {
+ rtx elt;
+ if (const_vec_duplicate_p (ops[1], &elt))
+ ops[1] = elt;
+ }
+ emit_merge_op (ops[0], ops[2], ops[1], mask);
+}
+
+/* Emit an RVV comparison. If one of SRC1 and SRC2 is a scalar operand, its
+ data_mode is specified using SCALAR_MODE. */
+static void
+emit_pred_cmp (unsigned icode, rtx_code rcode, rtx mask, rtx dest, rtx maskoff,
+ rtx src1, rtx src2, rtx len, machine_mode mask_mode,
+ machine_mode scalar_mode = VOIDmode)
+{
+ insn_expander<9> e;
+ e.set_dest_and_mask (mask, dest, maskoff, mask_mode);
+ machine_mode data_mode = GET_MODE (src1);
+
+ gcc_assert (VECTOR_MODE_P (GET_MODE (src1))
+ || VECTOR_MODE_P (GET_MODE (src2)));
+
+ if (!insn_operand_matches ((enum insn_code) icode, e.opno () + 1, src1))
+ src1 = force_reg (data_mode, src1);
+ if (!insn_operand_matches ((enum insn_code) icode, e.opno () + 2, src2))
+ {
+ if (VECTOR_MODE_P (GET_MODE (src2)))
+ src2 = force_reg (data_mode, src2);
+ else
+ src2 = force_reg (scalar_mode, src2);
+ }
+ rtx comparison = gen_rtx_fmt_ee (rcode, mask_mode, src1, src2);
+ if (!VECTOR_MODE_P (GET_MODE (src2)))
+ comparison = gen_rtx_fmt_ee (rcode, mask_mode, src1,
+ gen_rtx_VEC_DUPLICATE (data_mode, src2));
+ e.add_fixed_operand (comparison);
+
+ e.add_fixed_operand (src1);
+ if (CONST_INT_P (src2))
+ e.add_integer_operand (src2);
+ else
+ e.add_fixed_operand (src2);
+
+ e.set_len_and_policy (len, maskoff ? MASK_UNDISTURBED : MASK_ANY, true);
+
+ e.expand ((enum insn_code) icode, false);
+}
+
+static void
+emit_len_cmp (unsigned icode, rtx_code rcode, rtx mask, rtx dest, rtx maskoff,
+ rtx src1, rtx src2, rtx len, machine_mode mask_mode,
+ machine_mode scalar_mode)
+{
+ emit_pred_cmp (icode, rcode, mask, dest, maskoff, src1, src2, len, mask_mode,
+ scalar_mode);
+}
+
+/* Expand an RVV integer comparison using the RVV equivalent of:
+
+ (set TARGET (CODE OP0 OP1)). */
+
+void
+expand_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
+{
+ machine_mode mask_mode = GET_MODE (target);
+ machine_mode data_mode = GET_MODE (op0);
+ insn_code icode;
+ bool scalar_p = false;
+
+ if (CONST_VECTOR_P (op1))
+ {
+ rtx elt;
+ if (const_vec_duplicate_p (op1, &elt))
+ op1 = elt;
+ scalar_p = true;
+ }
+
+ switch (code)
+ {
+ case LE:
+ case LEU:
+ case GT:
+ case GTU:
+ if (scalar_p)
+ icode = code_for_pred_cmp_scalar (data_mode);
+ else
+ icode = code_for_pred_cmp (data_mode);
+ break;
+ case EQ:
+ case NE:
+ if (scalar_p)
+ icode = code_for_pred_eqne_scalar (data_mode);
+ else
+ icode = code_for_pred_cmp (data_mode);
+ break;
+ case LT:
+ case LTU:
+ if (scalar_p)
+ icode = code_for_pred_cmp_scalar (data_mode);
+ else
+ icode = code_for_pred_ltge (data_mode);
+ break;
+ case GE:
+ case GEU:
+ if (scalar_p)
+ icode = code_for_pred_ge_scalar (data_mode);
+ else
+ icode = code_for_pred_ltge (data_mode);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ emit_len_cmp (icode, code, NULL_RTX, target, NULL_RTX, op0, op1, NULL,
+ mask_mode, GET_MODE_INNER (data_mode));
+}
+
+/* Expand an RVV integer comparison using the RVV equivalent of:
+
+ (set TARGET (CODE OP0 OP1)). */
+
+static void
+expand_vec_cmp_float (rtx mask, rtx target, rtx maskoff, rtx_code code, rtx op0,
+ rtx op1)
+{
+ machine_mode mask_mode = GET_MODE (target);
+ machine_mode data_mode = GET_MODE (op0);
+ insn_code icode;
+ bool scalar_p = false;
+
+ if (CONST_VECTOR_P (op1))
+ {
+ rtx elt;
+ if (const_vec_duplicate_p (op1, &elt))
+ op1 = elt;
+ scalar_p = true;
+ }
+
+ switch (code)
+ {
+ case EQ:
+ case NE:
+ if (scalar_p)
+ icode = code_for_pred_eqne_scalar (data_mode);
+ else
+ icode = code_for_pred_cmp (data_mode);
+ break;
+ case LT:
+ case LE:
+ case GT:
+ case GE:
+ if (scalar_p)
+ icode = code_for_pred_cmp_scalar (data_mode);
+ else
+ icode = code_for_pred_cmp (data_mode);
+ break;
+ case LTGT:
+ {
+ if (scalar_p)
+ icode = code_for_pred_cmp_scalar (data_mode);
+ else
+ icode = code_for_pred_cmp (data_mode);
+ rtx gt = gen_reg_rtx (mask_mode);
+ rtx lt = gen_reg_rtx (mask_mode);
+ emit_len_cmp (icode, GT, mask, gt, maskoff, op0, op1, NULL, mask_mode,
+ GET_MODE_INNER (data_mode));
+ emit_len_cmp (icode, LT, mask, lt, maskoff, op0, op1, NULL, mask_mode,
+ GET_MODE_INNER (data_mode));
+ icode = code_for_pred (IOR, mask_mode);
+ emit_len_binop (icode, target, gt, lt, NULL_RTX, mask_mode, VOIDmode);
+ return;
+ }
+ default:
+ gcc_unreachable ();
+ }
+ emit_len_cmp (icode, code, mask, target, maskoff, op0, op1, NULL, mask_mode,
+ GET_MODE_INNER (data_mode));
+}
+
+/* Expand an RVV floating-point comparison using the RVV equivalent of:
+
+ (set TARGET (CODE OP0 OP1))
+
+ If CAN_INVERT_P is true, the caller can also handle inverted results;
+ return true if the result is in fact inverted. */
+
+bool
+expand_vec_cmp_float (rtx target, rtx_code code, rtx op0, rtx op1,
+ bool can_invert_p)
+{
+ machine_mode mask_mode = GET_MODE (target);
+ machine_mode data_mode = GET_MODE (op0);
+
+ /* If can_invert_p = true:
+ It suffices to implement a u>= b as !(a < b) but with the NaNs masked off:
+
+ vmfeq.vv v0, va, va
+ vmfeq.vv v1, vb, vb
+ vmand.mm v0, v0, v1
+ vmflt.vv v0, va, vb, v0.t
+ vmnot.m v0, v0
+
+ And, if !HONOR_SNANS, then you can remove the vmand.mm by masking the
+ second vmfeq.vv:
+
+ vmfeq.vv v0, va, va
+ vmfeq.vv v0, vb, vb, v0.t
+ vmflt.vv v0, va, vb, v0.t
+ vmnot.m v0, v0
+
+ If can_invert_p = false:
+
+ # Example of implementing isgreater()
+ vmfeq.vv v0, va, va # Only set where A is not NaN.
+ vmfeq.vv v1, vb, vb # Only set where B is not NaN.
+ vmand.mm v0, v0, v1 # Only set where A and B are ordered,
+ vmfgt.vv v0, va, vb, v0.t # so only set flags on ordered values.
+ */
+
+ rtx eq0 = gen_reg_rtx (mask_mode);
+ rtx eq1 = gen_reg_rtx (mask_mode);
+ switch (code)
+ {
+ case EQ:
+ case NE:
+ case LT:
+ case LE:
+ case GT:
+ case GE:
+ case LTGT:
+ /* There is native support for the comparison. */
+ expand_vec_cmp_float (NULL_RTX, target, NULL_RTX, code, op0, op1);
+ return false;
+ case UNEQ:
+ case ORDERED:
+ case UNORDERED:
+ case UNLT:
+ case UNLE:
+ case UNGT:
+ case UNGE:
+ /* vmfeq.vv v0, va, va */
+ expand_vec_cmp_float (NULL_RTX, eq0, NULL_RTX, EQ, op0, op0);
+ if (HONOR_SNANS (data_mode))
+ {
+ /*
+ vmfeq.vv v1, vb, vb
+ vmand.mm v0, v0, v1
+ */
+ expand_vec_cmp_float (NULL_RTX, eq1, NULL_RTX, EQ, op1, op1);
+ insn_code icode = code_for_pred (AND, mask_mode);
+ emit_len_binop (icode, eq0, eq0, eq1, NULL_RTX, mask_mode, VOIDmode);
+ }
+ else
+ {
+ /* vmfeq.vv v0, vb, vb, v0.t */
+ expand_vec_cmp_float (eq0, eq0, eq0, EQ, op1, op1);
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (code == ORDERED)
+ {
+ emit_move_insn (target, eq0);
+ return false;
+ }
+
+ /* There is native support for the inverse comparison. */
+ code = reverse_condition_maybe_unordered (code);
+ if (code == ORDERED)
+ emit_move_insn (target, eq0);
+ else
+ expand_vec_cmp_float (eq0, eq0, eq0, code, op0, op1);
+
+ if (can_invert_p)
+ {
+ emit_move_insn (target, eq0);
+ return true;
+ }
+ insn_code icode = code_for_pred_not (mask_mode);
+ emit_len_unop (icode, target, eq0, NULL_RTX, mask_mode);
+ return false;
+}
+
} // namespace riscv_vector
new file mode 100644
@@ -0,0 +1,157 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+
+#include <stdint-gcc.h>
+
+#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX) \
+ void __attribute__ ((noinline, noclone)) \
+ vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r, \
+ DATA_TYPE *__restrict__ x, \
+ DATA_TYPE *__restrict__ y, \
+ CMP_TYPE *__restrict__ a, \
+ CMP_TYPE *__restrict__ b, \
+ int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ { \
+ DATA_TYPE xval = x[i], yval = y[i]; \
+ CMP_TYPE aval = a[i], bval = b[i]; \
+ r[i] = aval COND bval ? xval : yval; \
+ } \
+ }
+
+#define DEF_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX) \
+ void __attribute__ ((noinline, noclone)) \
+ vcond_imm_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r, \
+ DATA_TYPE *__restrict__ x, \
+ DATA_TYPE *__restrict__ y, \
+ CMP_TYPE *__restrict__ a, \
+ int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ { \
+ DATA_TYPE xval = x[i], yval = y[i]; \
+ CMP_TYPE aval = a[i]; \
+ r[i] = aval COND (CMP_TYPE) IMM ? xval : yval; \
+ } \
+ }
+
+#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX) \
+ T (int8_t, int8_t, COND, SUFFIX) \
+ T (int16_t, int16_t, COND, SUFFIX) \
+ T (int32_t, int32_t, COND, SUFFIX) \
+ T (int64_t, int64_t, COND, SUFFIX) \
+ T (float, int32_t, COND, SUFFIX##_float) \
+ T (double, int64_t, COND, SUFFIX##_double)
+
+#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX) \
+ T (uint8_t, uint8_t, COND, SUFFIX) \
+ T (uint16_t, uint16_t, COND, SUFFIX) \
+ T (uint32_t, uint32_t, COND, SUFFIX) \
+ T (uint64_t, uint64_t, COND, SUFFIX) \
+ T (float, uint32_t, COND, SUFFIX##_float) \
+ T (double, uint64_t, COND, SUFFIX##_double)
+
+#define TEST_COND_VAR_ALL(T, COND, SUFFIX) \
+ TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX) \
+ TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX)
+
+#define TEST_VAR_ALL(T) \
+ TEST_COND_VAR_ALL (T, >, _gt) \
+ TEST_COND_VAR_ALL (T, <, _lt) \
+ TEST_COND_VAR_ALL (T, >=, _ge) \
+ TEST_COND_VAR_ALL (T, <=, _le) \
+ TEST_COND_VAR_ALL (T, ==, _eq) \
+ TEST_COND_VAR_ALL (T, !=, _ne)
+
+#define TEST_COND_IMM_SIGNED_ALL(T, COND, IMM, SUFFIX) \
+ T (int8_t, int8_t, COND, IMM, SUFFIX) \
+ T (int16_t, int16_t, COND, IMM, SUFFIX) \
+ T (int32_t, int32_t, COND, IMM, SUFFIX) \
+ T (int64_t, int64_t, COND, IMM, SUFFIX) \
+ T (float, int32_t, COND, IMM, SUFFIX##_float) \
+ T (double, int64_t, COND, IMM, SUFFIX##_double)
+
+#define TEST_COND_IMM_UNSIGNED_ALL(T, COND, IMM, SUFFIX) \
+ T (uint8_t, uint8_t, COND, IMM, SUFFIX) \
+ T (uint16_t, uint16_t, COND, IMM, SUFFIX) \
+ T (uint32_t, uint32_t, COND, IMM, SUFFIX) \
+ T (uint64_t, uint64_t, COND, IMM, SUFFIX) \
+ T (float, uint32_t, COND, IMM, SUFFIX##_float) \
+ T (double, uint64_t, COND, IMM, SUFFIX##_double)
+
+#define TEST_COND_IMM_ALL(T, COND, IMM, SUFFIX) \
+ TEST_COND_IMM_SIGNED_ALL (T, COND, IMM, SUFFIX) \
+ TEST_COND_IMM_UNSIGNED_ALL (T, COND, IMM, SUFFIX)
+
+#define TEST_IMM_ALL(T) \
+ /* Expect immediates to make it into the encoding. */ \
+ TEST_COND_IMM_ALL (T, >, 5, _gt) \
+ TEST_COND_IMM_ALL (T, <, 5, _lt) \
+ TEST_COND_IMM_ALL (T, >=, 5, _ge) \
+ TEST_COND_IMM_ALL (T, <=, 5, _le) \
+ TEST_COND_IMM_ALL (T, ==, 5, _eq) \
+ TEST_COND_IMM_ALL (T, !=, 5, _ne) \
+ \
+ TEST_COND_IMM_SIGNED_ALL (T, >, 15, _gt2) \
+ TEST_COND_IMM_SIGNED_ALL (T, <, 15, _lt2) \
+ TEST_COND_IMM_SIGNED_ALL (T, >=, 15, _ge2) \
+ TEST_COND_IMM_SIGNED_ALL (T, <=, 15, _le2) \
+ TEST_COND_IMM_ALL (T, ==, 15, _eq2) \
+ TEST_COND_IMM_ALL (T, !=, 15, _ne2) \
+ \
+ TEST_COND_IMM_SIGNED_ALL (T, >, 16, _gt3) \
+ TEST_COND_IMM_SIGNED_ALL (T, <, 16, _lt3) \
+ TEST_COND_IMM_SIGNED_ALL (T, >=, 16, _ge3) \
+ TEST_COND_IMM_SIGNED_ALL (T, <=, 16, _le3) \
+ TEST_COND_IMM_ALL (T, ==, 16, _eq3) \
+ TEST_COND_IMM_ALL (T, !=, 16, _ne3) \
+ \
+ TEST_COND_IMM_SIGNED_ALL (T, >, -16, _gt4) \
+ TEST_COND_IMM_SIGNED_ALL (T, <, -16, _lt4) \
+ TEST_COND_IMM_SIGNED_ALL (T, >=, -16, _ge4) \
+ TEST_COND_IMM_SIGNED_ALL (T, <=, -16, _le4) \
+ TEST_COND_IMM_ALL (T, ==, -16, _eq4) \
+ TEST_COND_IMM_ALL (T, !=, -16, _ne4) \
+ \
+ TEST_COND_IMM_SIGNED_ALL (T, >, -17, _gt5) \
+ TEST_COND_IMM_SIGNED_ALL (T, <, -17, _lt5) \
+ TEST_COND_IMM_SIGNED_ALL (T, >=, -17, _ge5) \
+ TEST_COND_IMM_SIGNED_ALL (T, <=, -17, _le5) \
+ TEST_COND_IMM_ALL (T, ==, -17, _eq5) \
+ TEST_COND_IMM_ALL (T, !=, -17, _ne5) \
+ \
+ TEST_COND_IMM_UNSIGNED_ALL (T, >, 0, _gt6) \
+ /* Testing if an unsigned value >= 0 or < 0 is pointless as it will \
+ get folded away by the compiler. */ \
+ TEST_COND_IMM_UNSIGNED_ALL (T, <=, 0, _le6) \
+ \
+ TEST_COND_IMM_UNSIGNED_ALL (T, >, 127, _gt7) \
+ TEST_COND_IMM_UNSIGNED_ALL (T, <, 127, _lt7) \
+ TEST_COND_IMM_UNSIGNED_ALL (T, >=, 127, _ge7) \
+ TEST_COND_IMM_UNSIGNED_ALL (T, <=, 127, _le7) \
+ \
+ /* Expect immediates to NOT make it into the encoding, and instead be \
+ forced into a register. */ \
+ TEST_COND_IMM_UNSIGNED_ALL (T, >, 128, _gt8) \
+ TEST_COND_IMM_UNSIGNED_ALL (T, <, 128, _lt8) \
+ TEST_COND_IMM_UNSIGNED_ALL (T, >=, 128, _ge8) \
+ TEST_COND_IMM_UNSIGNED_ALL (T, <=, 128, _le8)
+
+TEST_VAR_ALL (DEF_VCOND_VAR)
+TEST_IMM_ALL (DEF_VCOND_IMM)
+
+/* { dg-final { scan-assembler-times {\tvmseq\.vi} 42 } } */
+/* { dg-final { scan-assembler-times {\tvmsne\.vi} 42 } } */
+/* { dg-final { scan-assembler-times {\tvmsgt\.vi} 30 } } */
+/* { dg-final { scan-assembler-times {\tvmsgtu\.vi} 12 } } */
+/* { dg-final { scan-assembler-times {\tvmslt\.vi} 8 } } */
+/* { dg-final { scan-assembler-times {\tvmsge\.vi} 8 } } */
+/* { dg-final { scan-assembler-times {\tvmsle\.vi} 30 } } */
+/* { dg-final { scan-assembler-times {\tvmsleu\.vi} 12 } } */
+/* { dg-final { scan-assembler-times {\tvmseq} 78 } } */
+/* { dg-final { scan-assembler-times {\tvmsne} 78 } } */
+/* { dg-final { scan-assembler-times {\tvmsgt} 82 } } */
+/* { dg-final { scan-assembler-times {\tvmslt} 38 } } */
+/* { dg-final { scan-assembler-times {\tvmsge} 38 } } */
+/* { dg-final { scan-assembler-times {\tvmsle} 82 } } */
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+
+#include <stdint-gcc.h>
+
+#define eq(A, B) ((A) == (B))
+#define ne(A, B) ((A) != (B))
+#define olt(A, B) ((A) < (B))
+#define ole(A, B) ((A) <= (B))
+#define oge(A, B) ((A) >= (B))
+#define ogt(A, B) ((A) > (B))
+#define ordered(A, B) (!__builtin_isunordered (A, B))
+#define unordered(A, B) (__builtin_isunordered (A, B))
+#define ueq(A, B) (!__builtin_islessgreater (A, B))
+#define ult(A, B) (__builtin_isless (A, B))
+#define ule(A, B) (__builtin_islessequal (A, B))
+#define uge(A, B) (__builtin_isgreaterequal (A, B))
+#define ugt(A, B) (__builtin_isgreater (A, B))
+#define nueq(A, B) (__builtin_islessgreater (A, B))
+#define nult(A, B) (!__builtin_isless (A, B))
+#define nule(A, B) (!__builtin_islessequal (A, B))
+#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
+#define nugt(A, B) (!__builtin_isgreater (A, B))
+
+#define TEST_LOOP(TYPE1, TYPE2, CMP) \
+ void __attribute__ ((noinline, noclone)) \
+ test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \
+ TYPE1 *restrict src, \
+ TYPE1 fallback, \
+ TYPE2 *restrict a, \
+ TYPE2 *restrict b, \
+ int count) \
+ { \
+ for (int i = 0; i < count; ++i) \
+ {\
+ TYPE2 aval = a[i]; \
+ TYPE2 bval = b[i]; \
+ TYPE1 srcval = src[i]; \
+ dest[i] = CMP (aval, bval) ? srcval : fallback; \
+ }\
+ }
+
+#define TEST_CMP(CMP) \
+ TEST_LOOP (int32_t, float, CMP) \
+ TEST_LOOP (uint32_t, float, CMP) \
+ TEST_LOOP (float, float, CMP) \
+ TEST_LOOP (int64_t, double, CMP) \
+ TEST_LOOP (uint64_t, double, CMP) \
+ TEST_LOOP (double, double, CMP)
+
+TEST_CMP (eq)
+TEST_CMP (ne)
+TEST_CMP (olt)
+TEST_CMP (ole)
+TEST_CMP (oge)
+TEST_CMP (ogt)
+TEST_CMP (ordered)
+TEST_CMP (unordered)
+TEST_CMP (ueq)
+TEST_CMP (ult)
+TEST_CMP (ule)
+TEST_CMP (uge)
+TEST_CMP (ugt)
+TEST_CMP (nueq)
+TEST_CMP (nult)
+TEST_CMP (nule)
+TEST_CMP (nuge)
+TEST_CMP (nugt)
+
+/* { dg-final { scan-assembler-times {\tvmfeq} 150 } } */
+/* { dg-final { scan-assembler-times {\tvmfne} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfgt} 30 } } */
+/* { dg-final { scan-assembler-times {\tvmflt} 30 } } */
+/* { dg-final { scan-assembler-times {\tvmfge} 18 } } */
+/* { dg-final { scan-assembler-times {\tvmfle} 18 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-trapping-math" } */
+
+/* The difference here is that nueq can use LTGT. */
+
+#include "vcond-2.c"
+
+/* { dg-final { scan-assembler-times {\tvmfeq} 90 } } */
+/* { dg-final { scan-assembler-times {\tvmfne} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfgt} 30 } } */
+/* { dg-final { scan-assembler-times {\tvmflt} 30 } } */
+/* { dg-final { scan-assembler-times {\tvmfge} 18 } } */
+/* { dg-final { scan-assembler-times {\tvmfle} 18 } } */
new file mode 100644
@@ -0,0 +1,49 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+
+#include "vcond-1.c"
+
+#define N 97
+
+#define TEST_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX) \
+{ \
+ DATA_TYPE x[N], y[N], r[N]; \
+ CMP_TYPE a[N], b[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ x[i] = i; \
+ y[i] = (i & 1) + 5; \
+ a[i] = i - N / 3; \
+ b[i] = N - N / 3 - i; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ vcond_var_##CMP_TYPE##_##SUFFIX (r, x, y, a, b, N); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (a[i] COND b[i] ? x[i] : y[i])) \
+ __builtin_abort (); \
+}
+
+#define TEST_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX) \
+{ \
+ DATA_TYPE x[N], y[N], r[N]; \
+ CMP_TYPE a[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ x[i] = i; \
+ y[i] = (i & 1) + 5; \
+ a[i] = IMM - N / 3 + i; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ vcond_imm_##CMP_TYPE##_##SUFFIX (r, x, y, a, N); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (a[i] COND (CMP_TYPE) IMM ? x[i] : y[i])) \
+ __builtin_abort (); \
+}
+
+int __attribute__ ((optimize (1)))
+main (int argc, char **argv)
+{
+ TEST_VAR_ALL (TEST_VCOND_VAR)
+ TEST_IMM_ALL (TEST_VCOND_IMM)
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,76 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+/* { dg-require-effective-target fenv_exceptions } */
+
+#include "vcond-2.c"
+
+#ifndef TEST_EXCEPTIONS
+#define TEST_EXCEPTIONS 1
+#endif
+
+#include <fenv.h>
+
+#define N 401
+
+#define RUN_LOOP(TYPE1, TYPE2, CMP, EXPECT_INVALID) \
+ { \
+ TYPE1 dest[N], src[N]; \
+ TYPE2 a[N], b[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ src[i] = i * i; \
+ if (i % 5 == 0) \
+ a[i] = 0; \
+ else if (i % 3) \
+ a[i] = i * 0.1; \
+ else \
+ a[i] = i; \
+ if (i % 7 == 0) \
+ b[i] = __builtin_nan (""); \
+ else if (i % 6) \
+ b[i] = i * 0.1; \
+ else \
+ b[i] = i; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ feclearexcept (FE_ALL_EXCEPT); \
+ test_##TYPE1##_##TYPE2##_##CMP##_var (dest, src, 11, a, b, N); \
+ if (TEST_EXCEPTIONS \
+ && !fetestexcept (FE_INVALID) != !(EXPECT_INVALID)) \
+ __builtin_abort (); \
+ for (int i = 0; i < N; ++i) \
+ if (dest[i] != (CMP (a[i], b[i]) ? src[i] : 11)) \
+ __builtin_abort (); \
+ }
+
+#define RUN_CMP(CMP, EXPECT_INVALID) \
+ RUN_LOOP (int32_t, float, CMP, EXPECT_INVALID) \
+ RUN_LOOP (uint32_t, float, CMP, EXPECT_INVALID) \
+ RUN_LOOP (float, float, CMP, EXPECT_INVALID) \
+ RUN_LOOP (int64_t, double, CMP, EXPECT_INVALID) \
+ RUN_LOOP (uint64_t, double, CMP, EXPECT_INVALID) \
+ RUN_LOOP (double, double, CMP, EXPECT_INVALID)
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ RUN_CMP (eq, 0)
+ RUN_CMP (ne, 0)
+ RUN_CMP (olt, 1)
+ RUN_CMP (ole, 1)
+ RUN_CMP (oge, 1)
+ RUN_CMP (ogt, 1)
+ RUN_CMP (ordered, 0)
+ RUN_CMP (unordered, 0)
+ RUN_CMP (ueq, 0)
+ RUN_CMP (ult, 0)
+ RUN_CMP (ule, 0)
+ RUN_CMP (uge, 0)
+ RUN_CMP (ugt, 0)
+ RUN_CMP (nueq, 0)
+ RUN_CMP (nult, 0)
+ RUN_CMP (nule, 0)
+ RUN_CMP (nuge, 0)
+ RUN_CMP (nugt, 0)
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,6 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-trapping-math" } */
+/* { dg-require-effective-target fenv_exceptions } */
+
+#define TEST_EXCEPTIONS 0
+#include "vcond_run-2.c"
@@ -63,6 +63,8 @@ foreach op $AUTOVEC_TEST_OPTS {
"" "$op"
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/binop/*.\[cS\]]] \
"" "$op"
+ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/cmp/*.\[cS\]]] \
+ "" "$op"
}
# VLS-VLMAX tests