@@ -219,6 +219,12 @@ (define_constraint "eQ"
"An IEEE 128-bit constant that can be loaded into VSX registers."
(match_operand 0 "easy_vector_constant_ieee128"))
+;; A vector pair constant that can be loaded into registers without using a
+;; load operation.
+(define_constraint "eV"
+ "A vector pair constant that can be loaded into VSX registers."
+ (match_operand 0 "easy_vector_pair_constant"))
+
;; Floating-point constraints. These two are defined so that insn
;; length attributes can be calculated exactly.
@@ -327,6 +327,11 @@ (define_predicate "const_0_to_15_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 0, 15)")))
+;; Match op = 0..31
+(define_predicate "const_0_to_31_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 31)")))
+
;; Return 1 if op is a 34-bit constant integer.
(define_predicate "cint34_operand"
(match_code "const_int")
@@ -729,6 +734,9 @@ (define_predicate "easy_vector_constant"
if (zero_constant (op, mode) || all_ones_constant (op, mode))
return true;
+ if (VECTOR_PAIR_MODE (mode) && easy_vector_pair_constant (op, mode))
+ return true;
+
/* Constants that can be generated with ISA 3.1 instructions are
easy. */
vec_const_128bit_type vsx_const;
@@ -759,6 +767,26 @@ (define_predicate "easy_vector_constant"
return false;
})
+;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
+;; a pair of vector registers without using memory.
+(define_predicate "easy_vector_pair_constant"
+ (match_code "const_vector")
+{
+ rtx hi_constant, lo_constant;
+ machine_mode vmode;
+
+ if (!TARGET_MMA || !TARGET_VECTOR_SIZE_32 || !VECTOR_PAIR_MODE (mode))
+ return false;
+
+ vmode = vector_pair_to_vector_mode (mode);
+ if (vmode == VOIDmode)
+ return false;
+
+ return (split_vector_pair_constant (op, &hi_constant, &lo_constant)
+ && easy_vector_constant (hi_constant, vmode)
+ && easy_vector_constant (lo_constant, vmode));
+})
+
;; Same as easy_vector_constant but only for EASY_VECTOR_15_ADD_SELF.
(define_predicate "easy_vector_constant_add_self"
(and (match_code "const_vector")
@@ -1301,8 +1329,10 @@ (define_predicate "splat_input_operand"
;; Return 1 if this operand is valid for a MMA assemble accumulator insn.
(define_special_predicate "mma_assemble_input_operand"
- (match_test "(mode == V16QImode
+ (match_test "(GET_MODE_SIZE (mode) == 16 && VECTOR_MODE_P (mode)
&& (vsx_register_operand (op, mode)
+ || op == CONST0_RTX (mode)
+ || vsx_prefixed_constant (op, mode)
|| (MEM_P (op)
&& (indexed_or_indirect_address (XEXP (op, 0), mode)
|| quad_address_p (XEXP (op, 0), mode, false)))))"))
@@ -631,6 +631,9 @@ rs6000_cpu_cpp_builtins (cpp_reader *pfile)
builtin_define ("__SIZEOF_IBM128__=16");
if (ieee128_float_type_node)
builtin_define ("__SIZEOF_IEEE128__=16");
+ if (TARGET_MMA && TARGET_VECTOR_SIZE_32)
+ builtin_define ("__VECTOR_SIZE_32__");
+
#ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
builtin_define ("__BUILTIN_CPU_SUPPORTS__");
#endif
@@ -61,6 +61,9 @@ extern bool rs6000_move_128bit_ok_p (rtx []);
extern bool rs6000_split_128bit_ok_p (rtx []);
extern void rs6000_expand_float128_convert (rtx, rtx, bool);
extern void rs6000_expand_vector_init (rtx, rtx);
+extern machine_mode vector_pair_to_vector_mode (machine_mode);
+extern bool split_vector_pair_constant (rtx, rtx *, rtx *);
+extern void rs6000_expand_vector_pair_init (rtx, rtx);
extern void rs6000_expand_vector_set (rtx, rtx, rtx);
extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
@@ -1843,7 +1843,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
/* Vector pair modes need even/odd VSX register pairs. Only allow vector
registers. */
- if (mode == OOmode)
+ if (VECTOR_PAIR_MODE (mode))
return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
/* MMA accumulator modes need FPR registers divisible by 4. */
@@ -1954,9 +1954,10 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
GPR registers, and TImode can go in any GPR as well as VSX registers (PR
57744).
- Similarly, don't allow OOmode (vector pair, restricted to even VSX
- registers) or XOmode (vector quad, restricted to FPR registers divisible
- by 4) to tie with other modes.
+ Similarly, don't allow XOmode (vector quad, restricted to FPR registers
+ divisible by 4) to tie with other modes.
+
+ Vector pair modes can tie with other vector pair modes.
Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
128-bit floating point on VSX systems ties with other vectors. */
@@ -1964,9 +1965,14 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
static bool
rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
{
- if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
- || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
- return mode1 == mode2;
+ if (mode1 == PTImode || mode1 == XOmode
+ || mode2 == PTImode || mode2 == XOmode)
+ return mode1 == mode2;
+
+ if (VECTOR_PAIR_MODE (mode1))
+ return VECTOR_PAIR_MODE (mode2);
+ if (VECTOR_PAIR_MODE (mode2))
+ return ALTIVEC_OR_VSX_VECTOR_MODE (mode1);
if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
@@ -2715,13 +2721,13 @@ rs6000_setup_reg_addr_masks (void)
of the LXVP or STXVP instructions, do not allow indexed mode so
that we can split the load/store. */
else if ((addr_mask != 0) && TARGET_MMA
- && (m2 == OOmode || m2 == XOmode))
+ && (VECTOR_PAIR_MODE (m2) || m2 == XOmode))
{
addr_mask |= RELOAD_REG_OFFSET;
if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
{
addr_mask |= RELOAD_REG_QUAD_OFFSET;
- if (m2 == OOmode
+ if (VECTOR_PAIR_MODE (m2)
&& TARGET_LOAD_VECTOR_PAIR
&& TARGET_STORE_VECTOR_PAIR)
addr_mask |= RELOAD_REG_INDEXED;
@@ -2941,6 +2947,33 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
rs6000_vector_align[XOmode] = 512;
}
+ if (TARGET_MMA && TARGET_VECTOR_SIZE_32)
+ {
+ rs6000_vector_unit[V32QImode] = VECTOR_NONE;
+ rs6000_vector_mem[V32QImode] = VECTOR_VSX;
+ rs6000_vector_align[V32QImode] = 256;
+
+ rs6000_vector_unit[V16HImode] = VECTOR_NONE;
+ rs6000_vector_mem[V16HImode] = VECTOR_VSX;
+ rs6000_vector_align[V16HImode] = 256;
+
+ rs6000_vector_unit[V8SImode] = VECTOR_NONE;
+ rs6000_vector_mem[V8SImode] = VECTOR_VSX;
+ rs6000_vector_align[V8SImode] = 256;
+
+ rs6000_vector_unit[V8SFmode] = VECTOR_NONE;
+ rs6000_vector_mem[V8SFmode] = VECTOR_VSX;
+ rs6000_vector_align[V8SFmode] = 256;
+
+ rs6000_vector_unit[V4DImode] = VECTOR_NONE;
+ rs6000_vector_mem[V4DImode] = VECTOR_VSX;
+ rs6000_vector_align[V4DImode] = 256;
+
+ rs6000_vector_unit[V4DFmode] = VECTOR_NONE;
+ rs6000_vector_mem[V4DFmode] = VECTOR_VSX;
+ rs6000_vector_align[V4DFmode] = 256;
+ }
+
/* Register class constraints for the constraints that depend on compile
switches. When the VSX code was added, different constraints were added
based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
@@ -3072,6 +3105,22 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
}
+
+ if (TARGET_MMA && TARGET_VECTOR_SIZE_32)
+ {
+ reg_addr[V32QImode].reload_store = CODE_FOR_reload_v32qi_di_store;
+ reg_addr[V32QImode].reload_load = CODE_FOR_reload_v32qi_di_load;
+ reg_addr[V16HImode].reload_store = CODE_FOR_reload_v16hi_di_store;
+ reg_addr[V16HImode].reload_load = CODE_FOR_reload_v16hi_di_load;
+ reg_addr[V8SImode].reload_store = CODE_FOR_reload_v8si_di_store;
+ reg_addr[V8SImode].reload_load = CODE_FOR_reload_v8si_di_load;
+ reg_addr[V8SFmode].reload_store = CODE_FOR_reload_v8sf_di_store;
+ reg_addr[V8SFmode].reload_load = CODE_FOR_reload_v8sf_di_load;
+ reg_addr[V4DImode].reload_store = CODE_FOR_reload_v4di_di_store;
+ reg_addr[V4DImode].reload_load = CODE_FOR_reload_v4di_di_load;
+ reg_addr[V4DFmode].reload_store = CODE_FOR_reload_v4df_di_store;
+ reg_addr[V4DFmode].reload_load = CODE_FOR_reload_v4df_di_load;
+ }
}
}
else
@@ -3129,6 +3178,22 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
}
+
+ if (TARGET_MMA && TARGET_VECTOR_SIZE_32)
+ {
+ reg_addr[V32QImode].reload_store = CODE_FOR_reload_v32qi_si_store;
+ reg_addr[V32QImode].reload_load = CODE_FOR_reload_v32qi_si_load;
+ reg_addr[V16HImode].reload_store = CODE_FOR_reload_v16hi_si_store;
+ reg_addr[V16HImode].reload_load = CODE_FOR_reload_v16hi_si_load;
+ reg_addr[V8SImode].reload_store = CODE_FOR_reload_v8si_si_store;
+ reg_addr[V8SImode].reload_load = CODE_FOR_reload_v8si_si_load;
+ reg_addr[V8SFmode].reload_store = CODE_FOR_reload_v8sf_si_store;
+ reg_addr[V8SFmode].reload_load = CODE_FOR_reload_v8sf_si_load;
+ reg_addr[V4DImode].reload_store = CODE_FOR_reload_v4di_si_store;
+ reg_addr[V4DImode].reload_load = CODE_FOR_reload_v4di_si_load;
+ reg_addr[V4DFmode].reload_store = CODE_FOR_reload_v4df_si_store;
+ reg_addr[V4DFmode].reload_load = CODE_FOR_reload_v4df_si_load;
+ }
}
reg_addr[DFmode].scalar_in_vmx_p = true;
@@ -4429,6 +4494,15 @@ rs6000_option_override_internal (bool global_init_p)
rs6000_isa_flags &= OPTION_MASK_STORE_VECTOR_PAIR;
}
+ if (!TARGET_MMA && TARGET_VECTOR_SIZE_32)
+ {
+ if (OPTION_SET_P (TARGET_VECTOR_SIZE_32))
+ warning (0, "%qs should not be used unless you use %qs",
+ "-mvector-size-32", "-mmma");
+
+ TARGET_VECTOR_SIZE_32 = 0;
+ }
+
/* Enable power10 fusion if we are tuning for power10, even if we aren't
generating power10 instructions. */
if (!(rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION))
@@ -7275,6 +7349,142 @@ rs6000_expand_vector_init (rtx target, rtx vals)
emit_move_insn (target, mem);
}
+/* For a vector pair mode, return the equivalent vector mode or VOIDmode. */
+
+machine_mode
+vector_pair_to_vector_mode (machine_mode mode)
+{
+ machine_mode vmode;
+
+ switch (mode)
+ {
+ case E_V32QImode: vmode = V16QImode; break;
+ case E_V16HImode: vmode = V8HImode; break;
+ case E_V8SImode: vmode = V4SImode; break;
+ case E_V4DImode: vmode = V2DImode; break;
+ case E_V8SFmode: vmode = V4SFmode; break;
+ case E_V4DFmode: vmode = V2DFmode; break;
+ case E_OOmode: vmode = V1TImode; break;
+ default: vmode = VOIDmode; break;
+ }
+
+ return vmode;
+}
+
+/* Split a vector constant for a type that can be held into a vector register
+ pair into 2 separate constants that can be held in a single vector register.
+ Return true if we can split the constant. */
+
+bool
+split_vector_pair_constant (rtx op, rtx *high, rtx *low)
+{
+ machine_mode vmode = vector_pair_to_vector_mode (GET_MODE (op));
+
+ *high = *low = NULL_RTX;
+
+ if (!CONST_VECTOR_P (op) || vmode == GET_MODE (op))
+ return false;
+
+ size_t nunits = GET_MODE_NUNITS (vmode);
+ rtvec hi_vec = rtvec_alloc (nunits);
+ rtvec lo_vec = rtvec_alloc (nunits);
+
+ for (size_t i = 0; i < nunits; i++)
+ {
+ RTVEC_ELT (hi_vec, i) = CONST_VECTOR_ELT (op, i);
+ RTVEC_ELT (lo_vec, i) = CONST_VECTOR_ELT (op, i + nunits);
+ }
+
+ *high = gen_rtx_CONST_VECTOR (vmode, hi_vec);
+ *low = gen_rtx_CONST_VECTOR (vmode, lo_vec);
+ return true;
+}
+
+/* Initialize vector pair TARGET to VALS. */
+
+void
+rs6000_expand_vector_pair_init (rtx target, rtx vals)
+{
+ machine_mode mode_vpair = GET_MODE (target);
+ machine_mode mode_vector;
+ size_t n_elts_vpair = GET_MODE_NUNITS (mode_vpair);
+ bool all_same = true;
+ rtx first = XVECEXP (vals, 0, 0);
+ rtx (*gen_splat) (rtx, rtx);
+ rtx (*gen_concat) (rtx, rtx, rtx);
+
+ switch (mode_vpair)
+ {
+ case E_V32QImode:
+ mode_vector = V16QImode;
+ gen_splat = gen_vpair_splat_v32qi;
+ gen_concat = gen_vpair_concat_v32qi;
+ break;
+
+ case E_V16HImode:
+ mode_vector = V8HImode;
+ gen_splat = gen_vpair_splat_v16hi;
+ gen_concat = gen_vpair_concat_v16hi;
+ break;
+
+ case E_V8SImode:
+ mode_vector = V4SImode;
+ gen_splat = gen_vpair_splat_v8si;
+ gen_concat = gen_vpair_concat_v8si;
+ break;
+
+ case E_V4DImode:
+ mode_vector = V2DImode;
+ gen_splat = gen_vpair_splat_v4di;
+ gen_concat = gen_vpair_concat_v4di;
+ break;
+
+ case E_V8SFmode:
+ mode_vector = V4SFmode;
+ gen_splat = gen_vpair_splat_v8sf;
+ gen_concat = gen_vpair_concat_v8sf;
+ break;
+
+ case E_V4DFmode:
+ mode_vector = V2DFmode;
+ gen_splat = gen_vpair_splat_v4df;
+ gen_concat = gen_vpair_concat_v4df;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* See if we can do a splat operation. */
+ for (size_t i = 1; i < n_elts_vpair; ++i)
+ {
+ if (!rtx_equal_p (XVECEXP (vals, 0, i), first))
+ {
+ all_same = false;
+ break;
+ }
+ }
+
+ if (all_same)
+ {
+ emit_insn (gen_splat (target, first));
+ return;
+ }
+
+ /* Break the initialization into two parts. */
+ rtx vector_hi = gen_reg_rtx (mode_vector);
+ rtx vector_lo = gen_reg_rtx (mode_vector);
+ rtx vals_hi;
+ rtx vals_lo;
+
+ split_vector_pair_constant (vals, &vals_hi, &vals_lo);
+
+ rs6000_expand_vector_init (vector_hi, vals_hi);
+ rs6000_expand_vector_init (vector_lo, vals_lo);
+ emit_insn (gen_concat (target, vector_hi, vector_lo));
+ return;
+}
+
/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
is variable and also counts by vector element size for p9 and above. */
@@ -8694,6 +8904,12 @@ reg_offset_addressing_ok_p (machine_mode mode)
/* The vector pair/quad types support offset addressing if the
underlying vectors support offset addressing. */
case E_OOmode:
+ case E_V32QImode:
+ case E_V16HImode:
+ case E_V8SImode:
+ case E_V8SFmode:
+ case E_V4DImode:
+ case E_V4DFmode:
case E_XOmode:
return TARGET_MMA;
@@ -11202,6 +11418,12 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
case E_V2DFmode:
case E_V2DImode:
case E_V1TImode:
+ case E_V32QImode:
+ case E_V16HImode:
+ case E_V8SFmode:
+ case E_V8SImode:
+ case E_V4DFmode:
+ case E_V4DImode:
if (CONSTANT_P (operands[1])
&& !easy_vector_constant (operands[1], mode))
operands[1] = force_const_mem (mode, operands[1]);
@@ -13456,7 +13678,7 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
the GPR registers. */
if (rclass == GEN_OR_FLOAT_REGS)
{
- if (mode == OOmode)
+ if (VECTOR_PAIR_MODE (mode))
return VSX_REGS;
if (mode == XOmode)
@@ -23417,6 +23639,7 @@ altivec_expand_vec_perm_le (rtx operands[4])
rtx tmp = target;
rtx norreg = gen_reg_rtx (V16QImode);
machine_mode mode = GET_MODE (target);
+ machine_mode qi_vmode = VECTOR_PAIR_MODE (mode) ? V32QImode : V16QImode;
/* Get everything in regs so the pattern matches. */
if (!REG_P (op0))
@@ -23424,7 +23647,7 @@ altivec_expand_vec_perm_le (rtx operands[4])
if (!REG_P (op1))
op1 = force_reg (mode, op1);
if (!REG_P (sel))
- sel = force_reg (V16QImode, sel);
+ sel = force_reg (qi_vmode, sel);
if (!REG_P (target))
tmp = gen_reg_rtx (mode);
@@ -23437,10 +23660,10 @@ altivec_expand_vec_perm_le (rtx operands[4])
{
/* Invert the selector with a VNAND if available, else a VNOR.
The VNAND is preferred for future fusion opportunities. */
- notx = gen_rtx_NOT (V16QImode, sel);
+ notx = gen_rtx_NOT (qi_vmode, sel);
iorx = (TARGET_P8_VECTOR
- ? gen_rtx_IOR (V16QImode, notx, notx)
- : gen_rtx_AND (V16QImode, notx, notx));
+ ? gen_rtx_IOR (qi_vmode, notx, notx)
+ : gen_rtx_AND (qi_vmode, notx, notx));
emit_insn (gen_rtx_SET (norreg, iorx));
/* Permute with operands reversed and adjusted selector. */
@@ -24572,6 +24795,9 @@ static struct rs6000_opt_var const rs6000_opt_vars[] =
{ "speculate-indirect-jumps",
offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
+ { "vector-size-32",
+ offsetof (struct gcc_options, x_TARGET_VECTOR_SIZE_32),
+ offsetof (struct cl_target_option, x_TARGET_VECTOR_SIZE_32), },
};
/* Inner function to handle attribute((target("..."))) and #pragma GCC target
@@ -27426,6 +27652,8 @@ rs6000_split_multireg_move (rtx dst, rtx src)
int reg_mode_size;
/* The number of registers that will be moved. */
int nregs;
+ /* Hi/lo values for splitting vector pair constants. */
+ rtx vpair_hi, vpair_lo;
reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
mode = GET_MODE (dst);
@@ -27441,8 +27669,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
}
/* If we have a vector pair/quad mode, split it into two/four separate
vectors. */
- else if (mode == OOmode || mode == XOmode)
- reg_mode = V1TImode;
+ else if (VECTOR_PAIR_MODE (mode) || mode == XOmode)
+ {
+ machine_mode vmode = vector_pair_to_vector_mode (mode);
+ reg_mode = (vmode == VOIDmode) ? V1TImode : vmode;
+ }
else if (FP_REGNO_P (reg))
reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
(TARGET_HARD_FLOAT ? DFmode : SFmode);
@@ -27454,6 +27685,29 @@ rs6000_split_multireg_move (rtx dst, rtx src)
gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
+ /* Handle vector pair constants. */
+ if (CONST_VECTOR_P (src) && VECTOR_PAIR_MODE (mode) && TARGET_MMA
+ && split_vector_pair_constant (src, &vpair_hi, &vpair_lo)
+ && VSX_REGNO_P (reg))
+ {
+ reg_mode = GET_MODE (vpair_hi);
+ rtx reg_hi = gen_rtx_REG (reg_mode, reg);
+ rtx reg_lo = gen_rtx_REG (reg_mode, reg + 1);
+
+ emit_move_insn (reg_hi, vpair_hi);
+
+ /* 0.0 is easy. For other constants, copy the high register into the low
+ register if the two sets of constants are equal. This means we won't
+ be doing back to back prefixed load immediate instructions. */
+ if (rtx_equal_p (vpair_hi, vpair_lo)
+ && !rtx_equal_p (vpair_hi, CONST0_RTX (reg_mode)))
+ emit_move_insn (reg_lo, reg_hi);
+ else
+ emit_move_insn (reg_lo, vpair_lo);
+
+ return;
+ }
+
/* TDmode residing in FP registers is special, since the ISA requires that
the lower-numbered word of a register pair is always the most significant
word, even in little-endian mode. This does not match the usual subreg
@@ -27493,7 +27747,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
below. This means the last register gets the first memory
location. We also need to be careful of using the right register
numbers if we are splitting XO to OO. */
- if (mode == OOmode || mode == XOmode)
+ if (VECTOR_PAIR_MODE (mode) || mode == XOmode)
{
nregs = hard_regno_nregs (reg, mode);
int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
@@ -27553,7 +27807,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
gcc_assert (REG_P (dst));
if (GET_MODE (src) == XOmode)
gcc_assert (FP_REGNO_P (REGNO (dst)));
- if (GET_MODE (src) == OOmode)
+ if (VECTOR_PAIR_MODE (GET_MODE (src)))
gcc_assert (VSX_REGNO_P (REGNO (dst)));
int nvecs = XVECLEN (src, 0);
@@ -27628,7 +27882,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
overlap. */
int i;
/* XO/OO are opaque so cannot use subregs. */
- if (mode == OOmode || mode == XOmode )
+ if (VECTOR_PAIR_MODE (mode) || mode == XOmode )
{
for (i = nregs - 1; i >= 0; i--)
{
@@ -27802,7 +28056,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
continue;
/* XO/OO are opaque so cannot use subregs. */
- if (mode == OOmode || mode == XOmode )
+ if (VECTOR_PAIR_MODE (mode) || mode == XOmode )
{
rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
@@ -1006,6 +1006,12 @@ enum data_align { align_abi, align_opt, align_both };
(ALTIVEC_VECTOR_MODE (MODE) || VSX_VECTOR_MODE (MODE) \
|| (MODE) == V2DImode || (MODE) == V1TImode)
+/* Whether a mode is held in paired vector registers. */
+#define VECTOR_PAIR_MODE(MODE) \
+ ((MODE) == OOmode \
+ || (MODE) == V32QImode || (MODE) == V16HImode || (MODE) == V8SImode \
+ || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode)
+
/* Post-reload, we can't use any new AltiVec registers, as we already
emitted the vrsave mask. */
@@ -683,9 +683,13 @@ (define_mode_attr wd [(QI "b")
(HI "h")
(SI "w")
(DI "d")
+ (V32QI "b")
(V16QI "b")
+ (V16HI "h")
(V8HI "h")
+ (V8SI "w")
(V4SI "w")
+ (V4DI "d")
(V2DI "d")
(V1TI "q")
(TI "q")])
@@ -812,7 +816,7 @@ (define_mode_attr BOOL_REGS_UNARY [(TI "r,0,0,wa,v")
;; supplement addressing modes.
(define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
SF SD SI DF DD DI TI PTI KF IF TF
- OO XO])
+ OO XO V32QI V16HI V8SI V8SF V4DI V4DF])
;; Iterate over smin, smax
(define_code_iterator fp_minmax [smin smax])
@@ -15767,6 +15771,7 @@ (define_insn "hashchk"
(include "vsx.md")
(include "altivec.md")
(include "mma.md")
+(include "vector-pair.md")
(include "dfp.md")
(include "crypto.md")
(include "htm.md")
@@ -605,6 +605,10 @@ mstore-vector-pair
Target Undocumented Mask(STORE_VECTOR_PAIR) Var(rs6000_isa_flags)
Generate (do not generate) store vector pair instructions.
+mvector-size-32
+Target Undocumented Var(TARGET_VECTOR_SIZE_32) Init(0) Save
+Generate (do not generate) vector pair instructions for vector_size(32).
+
mrelative-jumptables
Target Undocumented Var(rs6000_relative_jumptables) Init(1) Save
new file mode 100644
@@ -0,0 +1,319 @@
+;; Vector pair arithmetic and logical instruction support.
+;; Copyright (C) 2020-2023 Free Software Foundation, Inc.
+;; Contributed by Peter Bergner <bergner@linux.ibm.com> and
+;; Michael Meissner <meissner@linux.ibm.com>
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; This function adds support for doing vector operations on pairs of vector
+;; registers. Most of the instructions use vector pair instructions to load
+;; and possibly store registers, but splitting the operation after register
+;; allocation to do 2 separate operations. The second scheduler pass can
+;; interleave other instructions between these pairs of instructions if
+;; possible.
+
+;; Iterator for all vector pair modes. Even though we do not provide integer
+;; vector pair operations at this time, we need to support loading and storing
+;; integer vector pairs for perumte operations (and eventually compare).
+(define_mode_iterator VPAIR [V32QI V16HI V8SI V4DI V8SF V4DF])
+
+;; Iterator for vector pairs with double word elements
+(define_mode_iterator VPAIR_DWORD [V4DI V4DF])
+
+;; Map vector pair mode to vector mode in upper case after the vector pair is
+;; split to two vectors.
+(define_mode_attr VPAIR_VECTOR [(V32QI "V16QI")
+ (V16HI "V8HI")
+ (V8SI "V4SI")
+ (V4DI "V2DI")
+ (V8SF "V4SF")
+ (V4DF "V2DF")])
+
+;; Map vector pair mode to vector mode in lower case after the vector pair is
+;; split to two vectors.
+(define_mode_attr vpair_vector_l [(V32QI "v16qi")
+ (V16HI "v8hi")
+ (V8SI "v4si")
+ (V4DI "v2di")
+ (V8SF "v4sf")
+ (V4DF "v2df")])
+
+;; Map vector pair mode to the base element mode.
+(define_mode_attr VPAIR_ELEMENT [(V32QI "QI")
+ (V16HI "HI")
+ (V8SI "SI")
+ (V4DI "DI")
+ (V8SF "SF")
+ (V4DF "DF")])
+
+;; Map vector pair mode to the base element mode in lower case.
+(define_mode_attr vpair_element_l [(V32QI "qi")
+ (V16HI "hi")
+ (V8SI "si")
+ (V4DI "di")
+ (V8SF "sf")
+ (V4DF "df")])
+
+;; Vector pair move support.
+(define_expand "mov<mode>"
+ [(set (match_operand:VPAIR 0 "nonimmediate_operand")
+ (match_operand:VPAIR 1 "input_operand"))]
+ "TARGET_MMA && TARGET_VECTOR_SIZE_32"
+{
+ rs6000_emit_move (operands[0], operands[1], <MODE>mode);
+ DONE;
+})
+
+(define_insn_and_split "*mov<mode>"
+ [(set (match_operand:VPAIR 0 "nonimmediate_operand"
+ "=wa, wa, ZwO, QwO, wa, wa, wa")
+
+ (match_operand:VPAIR 1 "input_operand"
+ "ZwO, QwO, wa, wa, wa, j, eV"))]
+ "TARGET_MMA
+ && (gpc_reg_operand (operands[0], <MODE>mode)
+ || gpc_reg_operand (operands[1], <MODE>mode))"
+ "@
+ lxvp%X1 %x0,%1
+ #
+ stxvp%X0 %x1,%0
+ #
+ #
+ #
+ #"
+ "&& reload_completed
+ && ((MEM_P (operands[0]) && !TARGET_STORE_VECTOR_PAIR)
+ || (MEM_P (operands[1]) && !TARGET_LOAD_VECTOR_PAIR)
+ || (!MEM_P (operands[0]) && !MEM_P (operands[1])))"
+ [(const_int 0)]
+{
+ rs6000_split_multireg_move (operands[0], operands[1]);
+ DONE;
+}
+ [(set_attr "size" "256")
+ (set_attr "type" "vecload, vecload, vecstore, vecstore, veclogical,
+ vecperm, vecperm")
+ (set_attr "length" "*, 8, *, 8, 8,
+ 8, 24")
+ (set_attr "isa" "lxvp, *, stxvp, *, *,
+ *, *")])
+
+;; Vector pair initialization
+(define_expand "vec_init<mode><vpair_element_l>"
+ [(match_operand:VPAIR 0 "vsx_register_operand")
+ (match_operand:VPAIR 1 "")]
+ "TARGET_MMA && TARGET_VECTOR_SIZE_32"
+{
+ rs6000_expand_vector_pair_init (operands[0], operands[1]);
+ DONE;
+})
+
+;; Set an element in a vector pair with double word elements.
+(define_insn_and_split "vec_set<mode>"
+ [(set (match_operand:VPAIR_DWORD 0 "vsx_register_operand" "+&wa")
+ (unspec:VPAIR_DWORD
+ [(match_dup 0)
+ (match_operand:<VPAIR_ELEMENT> 1 "vsx_register_operand" "wa")
+ (match_operand 2 "const_0_to_3_operand" "n")]
+ UNSPEC_VSX_SET))
+ (clobber (match_scratch:<VPAIR_ELEMENT> 3 "=&wa"))]
+ "TARGET_MMA && TARGET_VECTOR_SIZE_32"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx dest = operands[0];
+ rtx value = operands[1];
+ HOST_WIDE_INT elt = INTVAL (operands[2]);
+ rtx tmp = operands[3];
+ machine_mode mode = <MODE>mode;
+ machine_mode vmode = <VPAIR_VECTOR>mode;
+ unsigned vsize = GET_MODE_SIZE (<VPAIR_VECTOR>mode);
+ unsigned reg_num = ((WORDS_BIG_ENDIAN && elt >= vsize)
+ || (!WORDS_BIG_ENDIAN && elt < vsize));
+
+ rtx vreg = simplify_gen_subreg (vmode, dest, mode, reg_num * 16);
+
+ if ((elt & 0x1) == 0)
+ {
+ emit_insn (gen_vsx_extract_<vpair_vector_l> (tmp, vreg, const1_rtx));
+ emit_insn (gen_vsx_concat_<vpair_vector_l> (vreg, value, tmp));
+ }
+ else
+ {
+ emit_insn (gen_vsx_extract_<vpair_vector_l> (tmp, vreg, const0_rtx));
+ emit_insn (gen_vsx_concat_<vpair_vector_l> (vreg, tmp, value));
+ }
+
+ DONE;
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "vecperm")])
+
+;; Exctract DF/DI from V4DF/V4DI, convert it into extract from V2DF/V2DI.
+(define_insn_and_split "vec_extract<mode><vpair_element_l>"
+ [(set (match_operand:<VPAIR_ELEMENT> 0 "gpc_reg_operand" "=wa,r")
+ (vec_select:<VPAIR_ELEMENT>
+ (match_operand:VPAIR_DWORD 1 "gpc_reg_operand" "wa,wa")
+ (parallel
+ [(match_operand:QI 2 "const_0_to_3_operand" "n,n")])))]
+ "TARGET_MMA && TARGET_VECTOR_SIZE_32"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (vec_select:<VPAIR_ELEMENT>
+ (match_dup 3)
+ (parallel [(match_dup 4)])))]
+{
+ machine_mode vmode = <VPAIR_VECTOR>mode;
+ rtx op1 = operands[1];
+ HOST_WIDE_INT element = INTVAL (operands[2]);
+ unsigned reg_num = 0;
+
+ if ((WORDS_BIG_ENDIAN && element >= 2)
+ || (!WORDS_BIG_ENDIAN && element < 2))
+ reg_num++;
+
+ operands[3] = simplify_gen_subreg (vmode, op1, <MODE>mode, reg_num * 16);
+ operands[4] = GEN_INT (element & 1);
+}
+ [(set_attr "type" "mfvsr,vecperm")])
+
+;; Extract a SFmode element from V8SF
+(define_insn_and_split "vec_extractv8sfsf"
+ [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+ (vec_select:SF
+ (match_operand:V8SF 1 "vsx_register_operand" "wa")
+ (parallel [(match_operand:QI 2 "const_0_to_7_operand" "n")])))]
+ "TARGET_MMA && TARGET_VECTOR_SIZE_32"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx tmp;
+ HOST_WIDE_INT element = INTVAL (operands[2]);
+ unsigned reg_num = 0;
+
+ if ((WORDS_BIG_ENDIAN && element >= 4)
+ || (!WORDS_BIG_ENDIAN && element < 4))
+ reg_num++;
+
+ rtx vreg = simplify_gen_subreg (V4SFmode, op1, V8SFmode, reg_num * 16);
+ HOST_WIDE_INT vreg_elt = element & 3;
+
+ /* Get the element into position 0 if it isn't there already. */
+ if (!vreg_elt)
+ tmp = vreg;
+ else
+ {
+ tmp = gen_rtx_REG (V4SFmode, reg_or_subregno (op0));
+ emit_insn (gen_vsx_xxsldwi_v4sf (tmp, vreg, vreg, GEN_INT (vreg_elt)));
+ }
+
+ /* Convert the float element to double precision. */
+ emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
+ DONE;
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "fp")])
+
+;; Assemble a vector pair from two vectors.
+;;
+;; We have both endian versions to change which input register will be moved
+;; the the first register in the vector pair.
+(define_expand "vpair_concat_<mode>"
+ [(set (match_operand:VPAIR 0 "vsx_register_operand")
+ (vec_concat:VPAIR
+ (match_operand:<VPAIR_VECTOR> 1 "input_operand")
+ (match_operand:<VPAIR_VECTOR> 2 "input_operand")))]
+ "TARGET_MMA && TARGET_VECTOR_SIZE_32")
+
+(define_insn_and_split "*vpair_concat_<mode>_be"
+ [(set (match_operand:VPAIR 0 "vsx_register_operand" "=wa,&wa")
+ (vec_concat:VPAIR
+ (match_operand:<VPAIR_VECTOR> 1 "input_operand" "0,mwajeP")
+ (match_operand:<VPAIR_VECTOR> 2 "input_operand" "mwajeP,mwajeP")))]
+ "TARGET_MMA && TARGET_VECTOR_SIZE_32 && WORDS_BIG_ENDIAN"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 3) (match_dup 1))
+ (set (match_dup 4) (match_dup 2))]
+{
+ machine_mode vmode = <VPAIR_VECTOR>mode;
+ rtx op0 = operands[0];
+ operands[3] = simplify_gen_subreg (vmode, op0, <MODE>mode, 0);
+ operands[4] = simplify_gen_subreg (vmode, op0, <MODE>mode, 16);
+}
+ [(set_attr "length" "8")])
+
+(define_insn_and_split "*vpair_concat_<mode>_le"
+ [(set (match_operand:VPAIR 0 "vsx_register_operand" "=&wa,wa")
+ (vec_concat:VPAIR
+ (match_operand:<VPAIR_VECTOR> 1 "input_operand" "mwajeP,0")
+ (match_operand:<VPAIR_VECTOR> 2 "input_operand" "mwajeP,mwajeP")))]
+ "TARGET_MMA && TARGET_VECTOR_SIZE_32 && !WORDS_BIG_ENDIAN"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 3) (match_dup 1))
+ (set (match_dup 4) (match_dup 2))]
+{
+ machine_mode vmode = <VPAIR_VECTOR>mode;
+ rtx op0 = operands[0];
+ operands[3] = simplify_gen_subreg (vmode, op0, <MODE>mode, 0);
+ operands[4] = simplify_gen_subreg (vmode, op0, <MODE>mode, 16);
+}
+ [(set_attr "length" "8")])
+
+;; Zero a vector pair
+(define_expand "vpair_zero_<mode>"
+ [(set (match_operand:VPAIR 0 "vsx_register_operand") (match_dup 1))]
+ "TARGET_MMA && TARGET_VECTOR_SIZE_32"
+{
+ operands[1] = CONST0_RTX (<MODE>mode);
+})
+
+;; Create a vector pair with a value splat'ed (duplicated) to all of the
+;; elements.
+(define_expand "vpair_splat_<mode>"
+ [(use (match_operand:VPAIR 0 "vsx_register_operand"))
+ (use (match_operand:<VPAIR_ELEMENT> 1 "input_operand"))]
+ "TARGET_MMA && TARGET_VECTOR_SIZE_32"
+{
+ machine_mode vmode = <VPAIR_VECTOR>mode;
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+
+ if (op1 == CONST0_RTX (vmode))
+ {
+ emit_insn (gen_vpair_zero_<mode> (op0));
+ DONE;
+ }
+
+ rtx tmp = gen_reg_rtx (vmode);
+
+ unsigned num_elements = GET_MODE_NUNITS (vmode);
+ rtvec elements = rtvec_alloc (num_elements);
+ for (size_t i = 0; i < num_elements; i++)
+ RTVEC_ELT (elements, i) = copy_rtx (op1);
+
+ rtx vec_elements = gen_rtx_PARALLEL (vmode, elements);
+ rs6000_expand_vector_init (tmp, vec_elements);
+ emit_insn (gen_vpair_concat_<mode> (op0, tmp, tmp));
+ DONE;
+})
@@ -3509,6 +3509,10 @@ loaded to a VSX register with one prefixed instruction.
An IEEE 128-bit constant that can be loaded into a VSX register with
the @code{lxvkq} instruction.
+@item eV
+A vector pair constant that can be loaded to a VSX register with two
+separate instructions.
+
@ifset INTERNALS
@item G
A floating point constant that can be loaded into a register with one