@@ -103,35 +103,6 @@ subparts_gt (tree type1, tree type2)
return known_gt (n1, n2);
}
-/* Build a constant of type TYPE, made of VALUE's bits replicated
- every WIDTH bits to fit TYPE's precision. */
-static tree
-build_replicated_const (tree type, unsigned int width, HOST_WIDE_INT value)
-{
- int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
- / HOST_BITS_PER_WIDE_INT;
- unsigned HOST_WIDE_INT low, mask;
- HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
- int i;
-
- gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
-
- if (width == HOST_BITS_PER_WIDE_INT)
- low = value;
- else
- {
- mask = ((HOST_WIDE_INT)1 << width) - 1;
- low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
- }
-
- for (i = 0; i < n; i++)
- a[i] = low;
-
- gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
- return wide_int_to_tree
- (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
-}
-
static GTY(()) tree vector_inner_type;
static GTY(()) tree vector_last_type;
static GTY(()) int vector_last_nunits;
@@ -255,8 +226,8 @@ do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
tree low_bits, high_bits, a_low, b_low, result_low, signs;
max = GET_MODE_MASK (TYPE_MODE (inner_type));
- low_bits = build_replicated_const (word_type, width, max >> 1);
- high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
+ low_bits = build_replicated_int_cst (word_type, width, max >> 1);
+ high_bits = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
@@ -289,8 +260,8 @@ do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
tree low_bits, high_bits, b_low, result_low, signs;
max = GET_MODE_MASK (TYPE_MODE (inner_type));
- low_bits = build_replicated_const (word_type, width, max >> 1);
- high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
+ low_bits = build_replicated_int_cst (word_type, width, max >> 1);
+ high_bits = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
@@ -6134,7 +6134,6 @@ vectorizable_shift (vec_info *vinfo,
return true;
}
-
/* Function vectorizable_operation.
Check if STMT_INFO performs a binary, unary or ternary operation that can
@@ -6405,20 +6404,6 @@ vectorizable_operation (vec_info *vinfo,
return false;
}
- /* ??? We should instead expand the operations here, instead of
- relying on vector lowering which has this hard cap on the number
- of vector elements below it performs elementwise operations. */
- if (using_emulated_vectors_p
- && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
- && ((BITS_PER_WORD / vector_element_bits (vectype)) < 4
- || maybe_lt (nunits_out, 4U)))
- {
- if (dump_enabled_p ())
- dump_printf (MSG_NOTE, "not using word mode for +- and less than "
- "four vector elements\n");
- return false;
- }
-
int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
internal_fn cond_fn = get_conditional_internal_fn (code);
@@ -6581,7 +6566,96 @@ vectorizable_operation (vec_info *vinfo,
vop1 = ((op_type == binary_op || op_type == ternary_op)
? vec_oprnds1[i] : NULL_TREE);
vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
- if (masked_loop_p && mask_out_inactive)
+ if (using_emulated_vectors_p
+ && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
+ {
+ /* Lower the operation. This follows vector lowering. */
+ unsigned int width = vector_element_bits (vectype);
+ tree inner_type = TREE_TYPE (vectype);
+ tree word_type
+ = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
+ HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
+ tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
+ tree high_bits
+ = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
+ tree wvop0 = make_ssa_name (word_type);
+ new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
+ build1 (VIEW_CONVERT_EXPR,
+ word_type, vop0));
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ tree result_low, signs;
+ if (code == PLUS_EXPR || code == MINUS_EXPR)
+ {
+ tree wvop1 = make_ssa_name (word_type);
+ new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
+ build1 (VIEW_CONVERT_EXPR,
+ word_type, vop1));
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ signs = make_ssa_name (word_type);
+ new_stmt = gimple_build_assign (signs,
+ BIT_XOR_EXPR, wvop0, wvop1);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ tree b_low = make_ssa_name (word_type);
+ new_stmt = gimple_build_assign (b_low,
+ BIT_AND_EXPR, wvop1, low_bits);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ tree a_low = make_ssa_name (word_type);
+ if (code == PLUS_EXPR)
+ new_stmt = gimple_build_assign (a_low,
+ BIT_AND_EXPR, wvop0, low_bits);
+ else
+ new_stmt = gimple_build_assign (a_low,
+ BIT_IOR_EXPR, wvop0, high_bits);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ if (code == MINUS_EXPR)
+ {
+ new_stmt = gimple_build_assign (NULL_TREE,
+ BIT_NOT_EXPR, signs);
+ signs = make_ssa_name (word_type);
+ gimple_assign_set_lhs (new_stmt, signs);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ }
+ new_stmt = gimple_build_assign (NULL_TREE,
+ BIT_AND_EXPR, signs, high_bits);
+ signs = make_ssa_name (word_type);
+ gimple_assign_set_lhs (new_stmt, signs);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ result_low = make_ssa_name (word_type);
+ new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ }
+ else
+ {
+ tree a_low = make_ssa_name (word_type);
+ new_stmt = gimple_build_assign (a_low,
+ BIT_AND_EXPR, wvop0, low_bits);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ signs = make_ssa_name (word_type);
+ new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ new_stmt = gimple_build_assign (NULL_TREE,
+ BIT_AND_EXPR, signs, high_bits);
+ signs = make_ssa_name (word_type);
+ gimple_assign_set_lhs (new_stmt, signs);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ result_low = make_ssa_name (word_type);
+ new_stmt = gimple_build_assign (result_low,
+ MINUS_EXPR, high_bits, a_low);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ }
+ new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
+ signs);
+ result_low = make_ssa_name (word_type);
+ gimple_assign_set_lhs (new_stmt, result_low);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
+ build1 (VIEW_CONVERT_EXPR,
+ vectype, result_low));
+ result_low = make_ssa_name (vectype);
+ gimple_assign_set_lhs (new_stmt, result_low);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ }
+ else if (masked_loop_p && mask_out_inactive)
{
tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
vectype, i);
@@ -2667,6 +2667,36 @@ build_zero_cst (tree type)
}
}
+/* Build a constant of integer type TYPE, made of VALUE's bits replicated
+ every WIDTH bits to fit TYPE's precision. */
+
+tree
+build_replicated_int_cst (tree type, unsigned int width, HOST_WIDE_INT value)
+{
+ int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
+ / HOST_BITS_PER_WIDE_INT;
+ unsigned HOST_WIDE_INT low, mask;
+ HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
+ int i;
+
+ gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
+
+ if (width == HOST_BITS_PER_WIDE_INT)
+ low = value;
+ else
+ {
+ mask = ((HOST_WIDE_INT)1 << width) - 1;
+ low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
+ }
+
+ for (i = 0; i < n; i++)
+ a[i] = low;
+
+ gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
+ return wide_int_to_tree
+ (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
+}
+
/* If floating-point type TYPE has an IEEE-style sign bit, return an
unsigned constant in which only the sign bit is set. Return null
otherwise. */
@@ -4685,6 +4685,7 @@ extern tree build_one_cst (tree);
extern tree build_minus_one_cst (tree);
extern tree build_all_ones_cst (tree);
extern tree build_zero_cst (tree);
+extern tree build_replicated_int_cst (tree, unsigned, HOST_WIDE_INT);
extern tree sign_mask_for (tree);
extern tree build_string (unsigned, const char * = NULL);
extern tree build_poly_int_cst (tree, const poly_wide_int_ref &);