@@ -2339,8 +2339,8 @@ classify_argument (machine_mode mode, const_tree type,
mode_alignment = 128;
else if (mode == XCmode)
mode_alignment = 256;
- if (COMPLEX_MODE_P (mode))
- mode_alignment /= 2;
+ /*if (COMPLEX_MODE_P (mode))
+ mode_alignment /= 2;*/
/* Misaligned fields are always returned in memory. */
if (bit_offset % mode_alignment)
return 0;
@@ -3007,6 +3007,7 @@ pass_in_reg:
case E_V4BFmode:
case E_V2SImode:
case E_V2SFmode:
+ case E_SCmode:
case E_V1TImode:
case E_V1DImode:
if (!type || !AGGREGATE_TYPE_P (type))
@@ -3257,6 +3258,7 @@ pass_in_reg:
case E_V4BFmode:
case E_V2SImode:
case E_V2SFmode:
+ case E_SCmode:
case E_V1TImode:
case E_V1DImode:
if (!type || !AGGREGATE_TYPE_P (type))
@@ -4158,8 +4160,8 @@ function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
&& !INTEGRAL_TYPE_P (valtype)
&& !VECTOR_FLOAT_TYPE_P (valtype))
break;
- if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
- && !COMPLEX_MODE_P (mode))
+ if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)))
+ // && !COMPLEX_MODE_P (mode))
regno = FIRST_SSE_REG;
break;
case 8:
@@ -4266,7 +4268,7 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
|| INTEGRAL_TYPE_P (type)
|| VECTOR_FLOAT_TYPE_P (type))
&& (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
- && !COMPLEX_MODE_P (mode)
+ //&& !COMPLEX_MODE_P (mode)
&& (GET_MODE_SIZE (mode) == 16 || size == 16))
return false;
@@ -15722,6 +15724,7 @@ ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
case E_V8SFmode:
case E_V4SFmode:
case E_V2SFmode:
+ case E_SCmode:
case E_V8DFmode:
case E_V4DFmode:
case E_V2DFmode:
@@ -15770,6 +15773,7 @@ ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
case E_V8SFmode:
case E_V4SFmode:
case E_V2SFmode:
+ case E_SCmode:
case E_V2SImode:
vec_mode = mode;
imode = SImode;
@@ -19821,7 +19825,8 @@ ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
else
{
if (COMPLEX_MODE_P (mode))
- return 2;
+ return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
+ //return 2;
else
return 1;
}
@@ -20157,7 +20162,8 @@ ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
}
if (COMPLEX_MODE_P (mode))
- return 2;
+ return 1;
+ //return 2;
/* Register pair for mask registers. */
if (mode == P2QImode || mode == P2HImode)
return 2;
@@ -23613,6 +23619,273 @@ ix86_preferred_simd_mode (scalar_mode mode)
}
}
+static rtx
+x86_gen_rtx_complex (machine_mode mode, rtx real_part, rtx imag_part)
+{
+ machine_mode imode = GET_MODE_INNER (mode);
+
+ if ((real_part == imag_part) && (real_part == CONST0_RTX (imode)))
+ {
+ if (CONST_DOUBLE_P (real_part))
+ return const_double_from_real_value (dconst0, mode);
+ else if (CONST_INT_P (real_part))
+ return GEN_INT (0);
+ else
+ gcc_unreachable ();
+ }
+
+ bool saved_generating_concat_p = generating_concat_p;
+ generating_concat_p = false;
+ rtx complex_reg = gen_reg_rtx (mode);
+ generating_concat_p = saved_generating_concat_p;
+
+ if (real_part)
+ {
+ gcc_assert (imode == GET_MODE (real_part));
+ write_complex_part (complex_reg, real_part, REAL_P, false);
+ }
+
+ if (imag_part)
+ {
+ gcc_assert (imode == GET_MODE (imag_part));
+ write_complex_part (complex_reg, imag_part, IMAG_P, false);
+ }
+
+ return complex_reg;
+}
+
+static rtx
+x86_read_complex_part (rtx cplx, complex_part_t part)
+{
+ machine_mode cmode;
+ scalar_mode imode;
+ unsigned ibitsize;
+
+ if (GET_CODE (cplx) == CONCAT)
+ return XEXP (cplx, part);
+
+ cmode = GET_MODE (cplx);
+ imode = GET_MODE_INNER (cmode);
+ ibitsize = GET_MODE_BITSIZE (imode);
+
+ if (COMPLEX_MODE_P (cmode) && (part == BOTH_P))
+ return cplx;
+
+ /* For constants under 32-bit vector constans are folded during expand,
+ * so we need to compensate for it as cplx is an integer constant
+ * In this case cmode and imode are equal */
+ if (cmode == imode)
+ ibitsize /= 2;
+
+ if (cmode == E_VOIDmode)
+ return cplx; /* FIXME case used when initialising mock in a complex register */
+
+ if ((cmode == E_DCmode) && (GET_CODE (cplx) == CONST_DOUBLE)) /* FIXME stop generation of DC const_double, because not patterns and wired */
+ return CONST0_RTX (E_DFmode);
+ /* verify aswell SC const_double */
+
+ /* Special case reads from complex constants that got spilled to memory. */
+ if (MEM_P (cplx) && GET_CODE (XEXP (cplx, 0)) == SYMBOL_REF)
+ {
+ tree decl = SYMBOL_REF_DECL (XEXP (cplx, 0));
+ if (decl && TREE_CODE (decl) == COMPLEX_CST)
+ {
+ tree cplx_part = (part == IMAG_P) ? TREE_IMAGPART (decl)
+ : (part == REAL_P) ? TREE_REALPART (decl)
+ : TREE_COMPLEX_BOTH_PARTS (decl);
+ if (CONSTANT_CLASS_P (cplx_part))
+ return expand_expr (cplx_part, NULL_RTX, imode, EXPAND_NORMAL);
+ }
+ }
+
+ /* For MEMs simplify_gen_subreg may generate an invalid new address
+ because, e.g., the original address is considered mode-dependent
+ by the target, which restricts simplify_subreg from invoking
+ adjust_address_nv. Instead of preparing fallback support for an
+ invalid address, we call adjust_address_nv directly. */
+ if (MEM_P (cplx))
+ {
+ if (part == BOTH_P)
+ return adjust_address_nv (cplx, cmode, 0);
+ else
+ return adjust_address_nv (cplx, imode, (part == IMAG_P)
+ ? GET_MODE_SIZE (imode) : 0);
+ }
+
+ /* If the sub-object is at least word sized, then we know that subregging
+ will work. This special case is important, since extract_bit_field
+ wants to operate on integer modes, and there's rarely an OImode to
+ correspond to TCmode. */
+ if (ibitsize >= BITS_PER_WORD
+ /* For hard regs we have exact predicates. Assume we can split
+ the original object if it spans an even number of hard regs.
+ This special case is important for SCmode on 64-bit platforms
+ where the natural size of floating-point regs is 32-bit. */
+ || (REG_P (cplx)
+ && REGNO (cplx) < FIRST_PSEUDO_REGISTER
+ && REG_NREGS (cplx) % 2 == 0))
+ {
+ rtx ret = simplify_gen_subreg (imode, cplx, cmode, (part == IMAG_P)
+ ? GET_MODE_SIZE (imode) : 0);
+ if (ret)
+ return ret;
+ else
+ /* simplify_gen_subreg may fail for sub-word MEMs. */
+ gcc_assert (MEM_P (cplx) && ibitsize < BITS_PER_WORD);
+ }
+
+ if (part == BOTH_P)
+ return extract_bit_field (cplx, 2 * ibitsize, 0, true, NULL_RTX, cmode,
+ cmode, false, NULL);
+ else
+ return extract_bit_field (cplx, ibitsize, (part == IMAG_P) ? ibitsize : 0,
+ true, NULL_RTX, imode, imode, false, NULL);
+}
+
+static void
+x86_write_complex_part (rtx cplx, rtx val, complex_part_t part, bool undefined_p)
+{
+ machine_mode cmode;
+ scalar_mode imode;
+ unsigned ibitsize;
+
+ cmode = GET_MODE (cplx);
+ imode = GET_MODE_INNER (cmode);
+ ibitsize = GET_MODE_BITSIZE (imode);
+
+ /* special case for constants */
+ if (GET_CODE (val) == CONST_VECTOR)
+ {
+ if (part == BOTH_P)
+ {
+ machine_mode temp_mode = E_BLKmode;;
+ switch (cmode)
+ {
+ case E_CQImode:
+ temp_mode = E_HImode;
+ break;
+ case E_CHImode:
+ temp_mode = E_SImode;
+ break;
+ case E_CSImode:
+ temp_mode = E_DImode;
+ break;
+ case E_SCmode:
+ temp_mode = E_DFmode;
+ break;
+ case E_CDImode:
+ temp_mode = E_TImode;
+ break;
+ case E_DCmode:
+ default:
+ break;
+ }
+
+ if (temp_mode != E_BLKmode)
+ {
+ rtx temp_reg = gen_reg_rtx (temp_mode);
+ store_bit_field (temp_reg, GET_MODE_BITSIZE (temp_mode), 0, 0,
+ 0, GET_MODE (val), val, false, undefined_p);
+ emit_move_insn (cplx,
+ simplify_gen_subreg (cmode, temp_reg, temp_mode,
+ 0));
+ }
+ else
+ {
+ /* write real part and imag part separetly */
+ gcc_assert (GET_CODE (val) == CONST_VECTOR);
+ write_complex_part (cplx, const_vector_elt (val, 0), REAL_P, false);
+ write_complex_part (cplx, const_vector_elt (val, 1), IMAG_P, false);
+ }
+ }
+ else
+ write_complex_part (cplx,
+ const_vector_elt (val,
+ ((part == REAL_P) ? 0 : 1)),
+ part, false);
+ return;
+ }
+
+ if ((part == BOTH_P) && !MEM_P (cplx)
+ /*&& (optab_handler (mov_optab, cmode) != CODE_FOR_nothing)*/)
+ {
+ write_complex_part (cplx, read_complex_part(cplx, REAL_P), REAL_P, undefined_p);
+ write_complex_part (cplx, read_complex_part(cplx, IMAG_P), IMAG_P, undefined_p);
+ //emit_move_insn (cplx, val);
+ return;
+ }
+
+ if ((GET_CODE (val) == CONST_DOUBLE) || (GET_CODE (val) == CONST_INT))
+ {
+ if (part == REAL_P)
+ {
+ emit_move_insn (gen_lowpart (imode, cplx), val);
+ return;
+ }
+ else if (part == IMAG_P)
+ {
+ /* cannot set highpart of a pseudo register */
+ if (REGNO (cplx) < FIRST_PSEUDO_REGISTER)
+ {
+ emit_move_insn (gen_highpart (imode, cplx), val);
+ return;
+ }
+ }
+ else
+ gcc_unreachable ();
+ }
+
+ if (GET_CODE (cplx) == CONCAT)
+ {
+ emit_move_insn (XEXP (cplx, part), val);
+ return;
+ }
+
+ /* For MEMs simplify_gen_subreg may generate an invalid new address
+ because, e.g., the original address is considered mode-dependent
+ by the target, which restricts simplify_subreg from invoking
+ adjust_address_nv. Instead of preparing fallback support for an
+ invalid address, we call adjust_address_nv directly. */
+ if (MEM_P (cplx))
+ {
+ if (part == BOTH_P)
+ emit_move_insn (adjust_address_nv (cplx, cmode, 0), val);
+ else
+ emit_move_insn (adjust_address_nv (cplx, imode, (part == IMAG_P)
+ ? GET_MODE_SIZE (imode) : 0), val);
+ return;
+ }
+
+ /* If the sub-object is at least word sized, then we know that subregging
+ will work. This special case is important, since store_bit_field
+ wants to operate on integer modes, and there's rarely an OImode to
+ correspond to TCmode. */
+ if (ibitsize >= BITS_PER_WORD
+ /* For hard regs we have exact predicates. Assume we can split
+ the original object if it spans an even number of hard regs.
+ This special case is important for SCmode on 64-bit platforms
+ where the natural size of floating-point regs is 32-bit. */
+ || (REG_P (cplx)
+ && REGNO (cplx) < FIRST_PSEUDO_REGISTER
+ && REG_NREGS (cplx) % 2 == 0))
+ {
+ rtx cplx_part = simplify_gen_subreg (imode, cplx, cmode,
+ (part == IMAG_P)
+ ? GET_MODE_SIZE (imode) : 0);
+ if (cplx_part)
+ {
+ emit_move_insn (cplx_part, val);
+ return;
+ }
+ else
+ /* simplify_gen_subreg may fail for sub-word MEMs. */
+ gcc_assert (MEM_P (cplx) && ibitsize < BITS_PER_WORD);
+ }
+
+ store_bit_field (cplx, ibitsize, (part == IMAG_P) ? ibitsize : 0, 0, 0,
+ imode, val, false, undefined_p);
+}
+
/* If AVX is enabled then try vectorizing with both 256bit and 128bit
vectors. If AVX512F is enabled then try vectorizing with 512bit,
256bit and 128bit vectors. */
@@ -25621,6 +25894,15 @@ ix86_libgcc_floating_mode_supported_p
#undef TARGET_IFUNC_REF_LOCAL_OK
#define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
+#undef TARGET_GEN_RTX_COMPLEX
+#define TARGET_GEN_RTX_COMPLEX x86_gen_rtx_complex
+
+#undef TARGET_READ_COMPLEX_PART
+#define TARGET_READ_COMPLEX_PART x86_read_complex_part
+
+#undef TARGET_WRITE_COMPLEX_PART
+#define TARGET_WRITE_COMPLEX_PART x86_write_complex_part
+
#if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
# undef TARGET_ASM_RELOC_RW_MASK
# define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
@@ -1054,7 +1054,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|| (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode \
|| (MODE) == V2DImode || (MODE) == V2QImode \
|| (MODE) == DFmode || (MODE) == DImode \
- || (MODE) == HFmode || (MODE) == BFmode)
+ || (MODE) == HFmode || (MODE) == BFmode \
+ || (MODE) == SCmode)
#define VALID_SSE_REG_MODE(MODE) \
((MODE) == V1TImode || (MODE) == TImode \
@@ -1063,7 +1064,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|| (MODE) == TFmode || (MODE) == TDmode)
#define VALID_MMX_REG_MODE_3DNOW(MODE) \
- ((MODE) == V2SFmode || (MODE) == SFmode)
+ ((MODE) == V2SFmode || (MODE) == SFmode || (MODE) == SCmode)
/* To match ia32 psABI, V4HFmode should be added here. */
#define VALID_MMX_REG_MODE(MODE) \
@@ -1106,13 +1107,15 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|| (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode \
|| (MODE) == V16SFmode \
|| (MODE) == V32HFmode || (MODE) == V16HFmode || (MODE) == V8HFmode \
- || (MODE) == V32BFmode || (MODE) == V16BFmode || (MODE) == V8BFmode)
+ || (MODE) == V32BFmode || (MODE) == V16BFmode || (MODE) == V8BFmode \
+ || (MODE) == SCmode)
#define X87_FLOAT_MODE_P(MODE) \
(TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode))
#define SSE_FLOAT_MODE_P(MODE) \
- ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
+ ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode) \
+ || (TARGET_SSE2 && (MODE) == SCmode))
#define SSE_FLOAT_MODE_SSEMATH_OR_HF_P(MODE) \
((SSE_FLOAT_MODE_P (MODE) && TARGET_SSE_MATH) \
@@ -30209,3 +30209,147 @@
"vcvtneo<bf16_ph>2ps\t{%1, %0|%0, %1}"
[(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "movsc"
+ [(match_operand:SC 0 "nonimmediate_operand" "")
+ (match_operand:SC 1 "nonimmediate_operand" "")]
+ ""
+ {
+ emit_insn (gen_movv2sf (simplify_gen_subreg (V2SFmode, operands[0], SCmode, 0),
+ simplify_gen_subreg (V2SFmode, operands[1], SCmode, 0)));
+ DONE;
+ }
+)
+
+(define_expand "addsc3"
+ [(match_operand:SC 0 "register_operand" "=r")
+ (match_operand:SC 1 "register_operand" "r")
+ (match_operand:SC 2 "register_operand" "r")]
+ ""
+ {
+ emit_insn (gen_addv2sf3 (simplify_gen_subreg (V2SFmode, operands[0], SCmode, 0),
+ simplify_gen_subreg (V2SFmode, operands[1], SCmode, 0),
+ simplify_gen_subreg (V2SFmode, operands[2], SCmode, 0)));
+ DONE;
+ }
+)
+
+(define_expand "subsc3"
+ [(match_operand:SC 0 "register_operand" "=r")
+ (match_operand:SC 1 "register_operand" "r")
+ (match_operand:SC 2 "register_operand" "r")]
+ ""
+ {
+ emit_insn (gen_subv2sf3 (simplify_gen_subreg (V2SFmode, operands[0], SCmode, 0),
+ simplify_gen_subreg (V2SFmode, operands[1], SCmode, 0),
+ simplify_gen_subreg (V2SFmode, operands[2], SCmode, 0)));
+ DONE;
+ }
+)
+
+(define_expand "negsc2"
+ [(match_operand:SC 0 "register_operand" "=r")
+ (match_operand:SC 1 "register_operand" "r")]
+ ""
+ {
+ emit_insn (gen_negv2sf2 (simplify_gen_subreg (V2SFmode, operands[0], SCmode, 0),
+ simplify_gen_subreg (V2SFmode, operands[1], SCmode, 0)));
+ DONE;
+ }
+)
+
+(define_expand "sse_shufsc"
+ [(match_operand:V4SF 0 "register_operand")
+ (match_operand:SC 1 "register_operand")
+ (match_operand:SC 2 "vector_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_SSE"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_sse_shufsc_sc (operands[0],
+ operands[1],
+ operands[2],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4)));
+ DONE;
+})
+
+(define_insn "sse_shufsc_sc"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,v")
+ (vec_select:V4SF
+ (vec_concat:V4SF
+ (match_operand:V2SF 1 "register_operand" "0,v")
+ (match_operand:V2SF 2 "vector_operand" "xBm,vm"))
+ (parallel [(match_operand 3 "const_0_to_3_operand")
+ (match_operand 4 "const_0_to_3_operand")
+ (match_operand 5 "const_4_to_7_operand")
+ (match_operand 6 "const_4_to_7_operand")])))]
+ "TARGET_SSE"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[3]) << 0;
+ mask |= INTVAL (operands[4]) << 2;
+ mask |= (INTVAL (operands[5]) - 4) << 4;
+ mask |= (INTVAL (operands[6]) - 4) << 6;
+ operands[3] = GEN_INT (mask);
+
+ switch (which_alternative)
+ {
+ case 0:
+ return "shufps\t{%3, %2, %0|%0, %2, %3}";
+ case 1:
+ return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "sseshuf")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "orig,maybe_evex")
+ (set_attr "mode" "V4SF")])
+
+(define_expand "mulsc3"
+ [(match_operand:SC 0 "register_operand" "=r")
+ (match_operand:SC 1 "register_operand" "r")
+ (match_operand:SC 2 "register_operand" "r")]
+ "TARGET_SSE3"
+ {
+ rtx a = gen_reg_rtx (V4SFmode);
+ rtx b = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_sse_shufsc (a,
+ simplify_gen_subreg (V2SFmode, operands[1], SCmode, 0),
+ simplify_gen_subreg (V2SFmode, operands[1], SCmode, 0),
+ GEN_INT (0b01000100)));
+ emit_insn (gen_sse_shufsc (b,
+ simplify_gen_subreg (V2SFmode, operands[2], SCmode, 0),
+ simplify_gen_subreg (V2SFmode, operands[2], SCmode, 0),
+ GEN_INT (0b00010100)));
+ emit_insn (gen_mulv4sf3 (a, a, b));
+ emit_insn (gen_sse_shufps (b,
+ a,
+ a,
+ GEN_INT (0b00001101)));
+ emit_insn (gen_sse_shufps (a,
+ a,
+ a,
+ GEN_INT (0b00001000)));
+ emit_insn (gen_vec_addsubv2sf3 (simplify_gen_subreg (V2SFmode, operands[0], SCmode, 0),
+ simplify_gen_subreg (V2SFmode, a, V4SFmode, 0),
+ simplify_gen_subreg (V2SFmode, b, V4SFmode, 0)));
+ DONE;
+ }
+)
+
+(define_expand "conjsc2"
+ [(match_operand:SC 0 "register_operand" "=r")
+ (match_operand:SC 1 "register_operand" "r")]
+ ""
+ {
+ emit_insn (gen_negdf2 (simplify_gen_subreg (DFmode, operands[0], SCmode, 0),
+ simplify_gen_subreg (DFmode, operands[1], SCmode, 0)));
+ DONE;
+ }
+)