@@ -11609,30 +11609,6 @@ fold_binary_loc (location_t loc, enum tree_code code, tree type,
}
else
{
- /* Fold z * +-I to __complex__ (-+__imag z, +-__real z).
- This is not the same for NaNs or if signed zeros are
- involved. */
- if (!HONOR_NANS (arg0)
- && !HONOR_SIGNED_ZEROS (arg0)
- && COMPLEX_FLOAT_TYPE_P (TREE_TYPE (arg0))
- && TREE_CODE (arg1) == COMPLEX_CST
- && real_zerop (TREE_REALPART (arg1)))
- {
- tree rtype = TREE_TYPE (TREE_TYPE (arg0));
- if (real_onep (TREE_IMAGPART (arg1)))
- return
- fold_build2_loc (loc, COMPLEX_EXPR, type,
- negate_expr (fold_build1_loc (loc, IMAGPART_EXPR,
- rtype, arg0)),
- fold_build1_loc (loc, REALPART_EXPR, rtype, arg0));
- else if (real_minus_onep (TREE_IMAGPART (arg1)))
- return
- fold_build2_loc (loc, COMPLEX_EXPR, type,
- fold_build1_loc (loc, IMAGPART_EXPR, rtype, arg0),
- negate_expr (fold_build1_loc (loc, REALPART_EXPR,
- rtype, arg0)));
- }
-
/* Optimize z * conj(z) for floating point complex numbers.
Guarded by flag_unsafe_math_optimizations as non-finite
imaginary components don't produce scalar results. */
@@ -11645,6 +11621,97 @@ fold_binary_loc (location_t loc, enum tree_code code, tree type,
&& operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0))
return fold_mult_zconjz (loc, type, arg0);
}
+
+ /* Fold z * +-I to __complex__ (-+__imag z, +-__real z).
+ This is not the same for NaNs or if signed zeros are
+ involved. */
+ if (!HONOR_NANS (arg0)
+ && !HONOR_SIGNED_ZEROS (arg0)
+ && TREE_CODE (arg1) == COMPLEX_CST
+ && (COMPLEX_FLOAT_TYPE_P (TREE_TYPE (arg0))
+ && real_zerop (TREE_REALPART (arg1))))
+ {
+ if (real_onep (TREE_IMAGPART (arg1)))
+ {
+ tree rtype = TREE_TYPE (TREE_TYPE (arg0));
+ tree cplx_build = fold_build2_loc (loc, COMPLEX_EXPR, type,
+ negate_expr (fold_build1_loc (loc, IMAGPART_EXPR,
+ rtype, arg0)),
+ fold_build1_loc (loc, REALPART_EXPR, rtype, arg0));
+ if (cplx_build && TREE_CODE (TREE_OPERAND (cplx_build, 0)) != NEGATE_EXPR)
+ return cplx_build;
+
+ if ((TREE_CODE (arg0) == COMPLEX_EXPR) && real_zerop (TREE_OPERAND (arg0, 1)))
+ return fold_build2_loc (loc, COMPLEX_EXPR, type,
+ TREE_OPERAND (arg0, 1), TREE_OPERAND (arg0, 0));
+
+ if (TREE_CODE (arg0) == CALL_EXPR)
+ {
+ if (CALL_EXPR_IFN (arg0) == IFN_COMPLEX_ROT90)
+ return negate_expr (CALL_EXPR_ARG (arg0, 0));
+ else if (CALL_EXPR_IFN (arg0) == IFN_COMPLEX_ROT270)
+ return CALL_EXPR_ARG (arg0, 0);
+ }
+ else if (TREE_CODE (arg0) == NEGATE_EXPR)
+ return build_call_expr_internal_loc(loc, IFN_COMPLEX_ROT270, TREE_TYPE (arg0), 1, TREE_OPERAND(arg0, 0));
+ else
+ return build_call_expr_internal_loc(loc, IFN_COMPLEX_ROT90, TREE_TYPE (arg0), 1, arg0);
+ }
+ else if (real_minus_onep (TREE_IMAGPART (arg1)))
+ {
+ if (real_zerop (TREE_OPERAND (arg0, 1)))
+ return fold_build2_loc (loc, COMPLEX_EXPR, type,
+ TREE_OPERAND (arg0, 1), negate_expr (TREE_OPERAND (arg0, 0)));
+
+ return build_call_expr_internal_loc(loc, IFN_COMPLEX_ROT270, TREE_TYPE (arg0), 1, fold (arg0));
+ }
+ }
+
+ /* Fold z * +-I to __complex__ (-+__imag z, +-__real z).
+ This is not the same for NaNs or if signed zeros are
+ involved. */
+ if (!HONOR_NANS (arg0)
+ && !HONOR_SIGNED_ZEROS (arg0)
+ && TREE_CODE (arg1) == COMPLEX_CST
+ && (COMPLEX_INTEGER_TYPE_P (TREE_TYPE (arg0))
+ && integer_zerop (TREE_REALPART (arg1))))
+ {
+ if (integer_onep (TREE_IMAGPART (arg1)))
+ {
+ tree rtype = TREE_TYPE (TREE_TYPE (arg0));
+ tree cplx_build = fold_build2_loc (loc, COMPLEX_EXPR, type,
+ negate_expr (fold_build1_loc (loc, IMAGPART_EXPR,
+ rtype, arg0)),
+ fold_build1_loc (loc, REALPART_EXPR, rtype, arg0));
+ if (cplx_build && TREE_CODE (TREE_OPERAND (cplx_build, 0)) != NEGATE_EXPR)
+ return cplx_build;
+
+ if ((TREE_CODE (arg0) == COMPLEX_EXPR) && integer_zerop (TREE_OPERAND (arg0, 1)))
+ return fold_build2_loc (loc, COMPLEX_EXPR, type,
+ TREE_OPERAND (arg0, 1), TREE_OPERAND (arg0, 0));
+
+ if (TREE_CODE (arg0) == CALL_EXPR)
+ {
+ if (CALL_EXPR_IFN (arg0) == IFN_COMPLEX_ROT90)
+ return negate_expr (CALL_EXPR_ARG (arg0, 0));
+ else if (CALL_EXPR_IFN (arg0) == IFN_COMPLEX_ROT270)
+ return CALL_EXPR_ARG (arg0, 0);
+ }
+ else if (TREE_CODE (arg0) == NEGATE_EXPR)
+ return build_call_expr_internal_loc(loc, IFN_COMPLEX_ROT270, TREE_TYPE (arg0), 1, TREE_OPERAND(arg0, 0));
+ else
+ return build_call_expr_internal_loc(loc, IFN_COMPLEX_ROT90, TREE_TYPE (arg0), 1, arg0);
+ }
+ else if (integer_minus_onep (TREE_IMAGPART (arg1)))
+ {
+ if (integer_zerop (TREE_OPERAND (arg0, 1)))
+ return fold_build2_loc (loc, COMPLEX_EXPR, type,
+ TREE_OPERAND (arg0, 1), negate_expr (TREE_OPERAND (arg0, 0)));
+
+ return build_call_expr_internal_loc(loc, IFN_COMPLEX_ROT270, TREE_TYPE (arg0), 1, fold (arg0));
+ }
+ }
+
goto associate;
case BIT_IOR_EXPR:
@@ -385,6 +385,8 @@ DEF_INTERNAL_FLT_FN (SCALB, ECF_CONST, scalb, binary)
DEF_INTERNAL_FLT_FLOATN_FN (FMIN, ECF_CONST, fmin, binary)
DEF_INTERNAL_FLT_FLOATN_FN (FMAX, ECF_CONST, fmax, binary)
DEF_INTERNAL_OPTAB_FN (XORSIGN, ECF_CONST, xorsign, binary)
+DEF_INTERNAL_OPTAB_FN (COMPLEX_ROT90, ECF_CONST, crot90, unary)
+DEF_INTERNAL_OPTAB_FN (COMPLEX_ROT270, ECF_CONST, crot270, unary)
DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT90, ECF_CONST, cadd90, binary)
DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT270, ECF_CONST, cadd270, binary)
DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL, ECF_CONST, cmul, binary)
@@ -330,6 +330,8 @@ OPTAB_D (atan_optab, "atan$a2")
OPTAB_D (atanh_optab, "atanh$a2")
OPTAB_D (copysign_optab, "copysign$F$a3")
OPTAB_D (xorsign_optab, "xorsign$F$a3")
+OPTAB_D (crot90_optab, "crot90$a2")
+OPTAB_D (crot270_optab, "crot270$a2")
OPTAB_D (cadd90_optab, "cadd90$a3")
OPTAB_D (cadd270_optab, "cadd270$a3")
OPTAB_D (cmul_optab, "cmul$a3")
@@ -241,7 +241,10 @@ init_dont_simulate_again (void)
switch (gimple_code (stmt))
{
case GIMPLE_CALL:
- if (gimple_call_lhs (stmt))
+ if (gimple_call_combined_fn (stmt) == CFN_COMPLEX_ROT90
+ || gimple_call_combined_fn (stmt) == CFN_COMPLEX_ROT270)
+ saw_a_complex_op = true;
+ else if (gimple_call_lhs (stmt))
sim_again_p = is_complex_reg (gimple_call_lhs (stmt));
break;
@@ -1727,6 +1730,67 @@ expand_complex_asm (gimple_stmt_iterator *gsi)
}
}
+/* Expand complex rotations represented as internal functions
+ * This function assumes that lowered complex rotation is still better
+ * than a complex multiplication, else the backend would has redfined
+ * crot90 and crot270 */
+
+static void
+expand_complex_rotation (gimple_stmt_iterator *gsi)
+{
+ gimple *stmt = gsi_stmt (*gsi);
+ tree ac = gimple_call_arg (stmt, 0);
+ gimple_seq stmts = NULL;
+ location_t loc = gimple_location (gsi_stmt (*gsi));
+
+ tree lhs = gimple_get_lhs (stmt);
+ tree type = TREE_TYPE (ac);
+ tree inner_type = TREE_TYPE (type);
+
+
+ tree rr, ri, rb;
+ optab op = optab_for_tree_code (MULT_EXPR, inner_type, optab_default);
+ if (optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
+ {
+ tree cst_i = build_complex (type, build_zero_cst (inner_type), build_one_cst (inner_type));
+ rb = gimple_build (&stmts, loc, MULT_EXPR, type, ac, cst_i);
+
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+
+ gassign* new_assign = gimple_build_assign (lhs, rb);
+ gimple_set_lhs (new_assign, lhs);
+ gsi_replace (gsi, new_assign, true);
+
+ update_complex_assignment (gsi, NULL, NULL, rb);
+ }
+ else
+ {
+ tree ar = extract_component (gsi, ac, REAL_P, true);
+ tree ai = extract_component (gsi, ac, IMAG_P, true);
+
+ if (gimple_call_internal_fn (stmt) == IFN_COMPLEX_ROT90)
+ {
+ rr = gimple_build (&stmts, loc, NEGATE_EXPR, inner_type, ai);
+ ri = ar;
+ }
+ else if (gimple_call_internal_fn (stmt) == IFN_COMPLEX_ROT270)
+ {
+ rr = ai;
+ ri = gimple_build (&stmts, loc, NEGATE_EXPR, inner_type, ar);
+ }
+ else
+ gcc_unreachable ();
+
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+
+ gassign* new_assign = gimple_build_assign (gimple_get_lhs (stmt), COMPLEX_EXPR, rr, ri);
+ gimple_set_lhs (new_assign, gimple_get_lhs (stmt));
+ gsi_replace (gsi, new_assign, true);
+
+ update_complex_assignment (gsi, rr, ri);
+ }
+}
+
/* Returns true if a complex component is a constant */
static bool
@@ -1843,6 +1907,19 @@ expand_complex_operations_1 (gimple_stmt_iterator *gsi)
if (gimple_code (stmt) == GIMPLE_COND)
return;
+ if (is_gimple_call (stmt)
+ && (gimple_call_combined_fn (stmt) == CFN_COMPLEX_ROT90
+ || gimple_call_combined_fn (stmt) == CFN_COMPLEX_ROT270))
+ {
+ if (!direct_internal_fn_supported_p (gimple_call_internal_fn (stmt), type,
+ bb_optimization_type (gimple_bb (stmt))))
+ expand_complex_rotation (gsi);
+ else
+ update_complex_components (gsi, stmt, NULL, NULL, gimple_call_lhs (stmt));
+
+ return;
+ }
+
if (TREE_CODE (type) == COMPLEX_TYPE)
expand_complex_move (gsi, type);
else if (is_gimple_assign (stmt)
@@ -3286,6 +3286,119 @@ last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
return false;
}
+/* Convert complex rotation to addition with one operation rotated
+ * in a similar way than FMAs */
+
+static void
+convert_crot_1 (tree crot_result, tree op1, internal_fn cadd_fn)
+{
+ gimple *use_stmt;
+ imm_use_iterator imm_iter;
+ gcall *cadd_stmt;
+
+ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, crot_result)
+ {
+ gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
+ tree add_op, result = crot_result;
+
+ if (is_gimple_debug (use_stmt))
+ continue;
+
+ add_op = (gimple_assign_rhs1 (use_stmt) != result)
+ ? gimple_assign_rhs1 (use_stmt) : gimple_assign_rhs2 (use_stmt);
+
+
+ cadd_stmt = gimple_build_call_internal (cadd_fn, 2, add_op, op1);
+ gimple_set_lhs (cadd_stmt, gimple_get_lhs (use_stmt));
+ gimple_call_set_nothrow (cadd_stmt, !stmt_can_throw_internal (cfun,
+ use_stmt));
+ gsi_replace (&gsi, cadd_stmt, true);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Generated COMPLEX_ADD_ROT ");
+ print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
+ fprintf (dump_file, "\n");
+ }
+ }
+}
+
+
+/* Convert complex rotation to addition with one operation rotated
+ * in a similar way than FMAs */
+
+static bool
+convert_crot (gimple *crot_stmt, tree op1, combined_fn crot_kind)
+{
+ internal_fn cadd_fn;
+ switch (crot_kind)
+ {
+ case CFN_COMPLEX_ROT90:
+ cadd_fn = IFN_COMPLEX_ADD_ROT90;
+ break;
+ case CFN_COMPLEX_ROT270:
+ cadd_fn = IFN_COMPLEX_ADD_ROT270;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+
+ tree crot_result = gimple_get_lhs (crot_stmt);
+ /* If there isn't a LHS then this can't be an CADD. There can be no LHS
+ if the statement was left just for the side-effects. */
+ if (!crot_result)
+ return false;
+ tree type = TREE_TYPE (crot_result);
+ gimple *use_stmt;
+ use_operand_p use_p;
+ imm_use_iterator imm_iter;
+
+ if (COMPLEX_FLOAT_TYPE_P (type)
+ && flag_fp_contract_mode == FP_CONTRACT_OFF)
+ return false;
+
+ /* We don't want to do bitfield reduction ops. */
+ if (INTEGRAL_TYPE_P (type)
+ && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
+ return false;
+
+ /* If the target doesn't support it, don't generate it. */
+ optimization_type opt_type = bb_optimization_type (gimple_bb (crot_stmt));
+ if (!direct_internal_fn_supported_p (cadd_fn, type, opt_type))
+ return false;
+
+ /* If the crot has zero uses, it is kept around probably because
+ of -fnon-call-exceptions. Don't optimize it away in that case,
+ it is DCE job. */
+ if (has_zero_uses (crot_result))
+ return false;
+
+ /* Make sure that the crot statement becomes dead after
+ the transformation, thus that all uses are transformed to FMAs.
+ This means we assume that an FMA operation has the same cost
+ as an addition. */
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, crot_result)
+ {
+ use_stmt = USE_STMT (use_p);
+
+ if (is_gimple_debug (use_stmt))
+ continue;
+
+ if (gimple_bb (use_stmt) != gimple_bb (crot_stmt))
+ return false;
+
+ if (!is_gimple_assign (use_stmt))
+ return false;
+
+ if (gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
+ return false;
+ }
+
+ convert_crot_1 (crot_result, op1, cadd_fn);
+ return true;
+}
+
/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
with uses in additions and subtractions to form fused multiply-add
operations. Returns true if successful and MUL_STMT should be removed.
@@ -5636,6 +5749,22 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
cancel_fma_deferring (&fma_state);
break;
+ case CFN_COMPLEX_ROT90:
+ case CFN_COMPLEX_ROT270:
+ if (gimple_call_lhs (stmt)
+ && convert_crot (stmt,
+ gimple_call_arg (stmt, 0),
+ gimple_call_combined_fn (stmt)))
+ {
+ unlink_stmt_vdef (stmt);
+ if (gsi_remove (&gsi, true)
+ && gimple_purge_dead_eh_edges (bb))
+ *m_cfg_changed_p = true;
+ release_defs (stmt);
+ continue;
+ }
+ break;
+
default:
break;
}