@@ -292,27 +292,34 @@ (define_expand "movoo"
gcc_assert (false);
})
+;; If the user used -mno-store-vector-pair or -mno-load-vector pair, use an
+;; alternative that does not allow indexed addresses so we can split the load
+;; or store.
(define_insn_and_split "*movoo"
- [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,ZwO,wa")
- (match_operand:OO 1 "input_operand" "ZwO,wa,wa"))]
+ [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,wa,ZwO,QwO,wa")
+ (match_operand:OO 1 "input_operand" "ZwO,QwO,wa,wa,wa"))]
"TARGET_MMA
&& (gpc_reg_operand (operands[0], OOmode)
|| gpc_reg_operand (operands[1], OOmode))"
"@
lxvp%X1 %x0,%1
+ #
stxvp%X0 %x1,%0
+ #
#"
"&& reload_completed
- && (!MEM_P (operands[0]) && !MEM_P (operands[1]))"
+ && ((MEM_P (operands[0]) && !TARGET_STORE_VECTOR_PAIR)
+ || (MEM_P (operands[1]) && !TARGET_LOAD_VECTOR_PAIR)
+ || (!MEM_P (operands[0]) && !MEM_P (operands[1])))"
[(const_int 0)]
{
rs6000_split_multireg_move (operands[0], operands[1]);
DONE;
}
- [(set_attr "type" "vecload,vecstore,veclogical")
+ [(set_attr "type" "vecload,vecload,vecstore,vecstore,veclogical")
(set_attr "size" "256")
- (set_attr "length" "*,*,8")])
-
+ (set_attr "length" "*,8,*,8,8")
+ (set_attr "isa" "lxvp,*,stxvp,*,*")])
;; Vector quad support. XOmode can only live in FPRs.
(define_expand "movxo"
@@ -77,10 +77,12 @@
/* Flags that need to be turned off if -mno-power10. */
/* We comment out PCREL_OPT here to disable it by default because SPEC2017
performance was degraded by it. */
-#define OTHER_POWER10_MASKS (OPTION_MASK_MMA \
+#define OTHER_POWER10_MASKS (OPTION_MASK_LOAD_VECTOR_PAIR \
+ | OPTION_MASK_MMA \
| OPTION_MASK_PCREL \
/* | OPTION_MASK_PCREL_OPT */ \
- | OPTION_MASK_PREFIXED)
+ | OPTION_MASK_PREFIXED \
+ | OPTION_MASK_STORE_VECTOR_PAIR)
#define ISA_3_1_MASKS_SERVER (ISA_3_0_MASKS_SERVER \
| OPTION_MASK_POWER10 \
@@ -130,6 +132,7 @@
| OPTION_MASK_FLOAT128_HW \
| OPTION_MASK_FLOAT128_KEYWORD \
| OPTION_MASK_FPRND \
+ | OPTION_MASK_LOAD_VECTOR_PAIR \
| OPTION_MASK_POWER10 \
| OPTION_MASK_P10_FUSION \
| OPTION_MASK_HTM \
@@ -156,6 +159,7 @@
| OPTION_MASK_QUAD_MEMORY_ATOMIC \
| OPTION_MASK_RECIP_PRECISION \
| OPTION_MASK_SOFT_FLOAT \
+ | OPTION_MASK_STORE_VECTOR_PAIR \
| OPTION_MASK_STRICT_ALIGN_OPTIONAL \
| OPTION_MASK_VSX)
@@ -2711,7 +2711,9 @@ rs6000_setup_reg_addr_masks (void)
/* Vector pairs can do both indexed and offset loads if the
instructions are enabled, otherwise they can only do offset loads
since it will be broken into two vector moves. Vector quads can
- only do offset loads. */
+ only do offset loads. If the user restricted generation of either
+ of the LXVP or STXVP instructions, do not allow indexed mode so
+ that we can split the load/store. */
else if ((addr_mask != 0) && TARGET_MMA
&& (m2 == OOmode || m2 == XOmode))
{
@@ -2719,7 +2721,9 @@ rs6000_setup_reg_addr_masks (void)
if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
{
addr_mask |= RELOAD_REG_QUAD_OFFSET;
- if (m2 == OOmode)
+ if (m2 == OOmode
+ && TARGET_LOAD_VECTOR_PAIR
+ && TARGET_STORE_VECTOR_PAIR)
addr_mask |= RELOAD_REG_INDEXED;
}
}
@@ -4405,6 +4409,26 @@ rs6000_option_override_internal (bool global_init_p)
rs6000_isa_flags &= ~OPTION_MASK_MMA;
}
+ /* Warn if -m-load-vector-pair or -m-store-vector-pair are used and MMA is
+ not set. */
+ if (!TARGET_MMA && TARGET_LOAD_VECTOR_PAIR)
+ {
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_LOAD_VECTOR_PAIR) != 0)
+ warning (0, "%qs should not be used unless you use %qs",
+ "-mload-vector-pair", "-mmma");
+
+ rs6000_isa_flags &= ~OPTION_MASK_LOAD_VECTOR_PAIR;
+ }
+
+ if (!TARGET_MMA && TARGET_STORE_VECTOR_PAIR)
+ {
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_STORE_VECTOR_PAIR) != 0)
+ warning (0, "%qs should not be used unless you use %qs",
+ "-mstore-vector-pair", "-mmma");
+
+ rs6000_isa_flags &= OPTION_MASK_STORE_VECTOR_PAIR;
+ }
+
/* Enable power10 fusion if we are tuning for power10, even if we aren't
generating power10 instructions. */
if (!(rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION))
@@ -24437,6 +24461,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{ "hard-dfp", OPTION_MASK_DFP, false, true },
{ "htm", OPTION_MASK_HTM, false, true },
{ "isel", OPTION_MASK_ISEL, false, true },
+ { "load-vector-pair", OPTION_MASK_LOAD_VECTOR_PAIR, false, true },
{ "mfcrf", OPTION_MASK_MFCRF, false, true },
{ "mfpgpr", 0, false, true },
{ "mma", OPTION_MASK_MMA, false, true },
@@ -24461,6 +24486,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{ "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
{ "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
{ "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
+ { "store-vector-pair", OPTION_MASK_STORE_VECTOR_PAIR, false, true },
{ "string", 0, false, true },
{ "update", OPTION_MASK_NO_UPDATE, true , true },
{ "vsx", OPTION_MASK_VSX, false, true },
@@ -355,7 +355,7 @@ (define_attr "cpu"
(const (symbol_ref "(enum attr_cpu) rs6000_tune")))
;; The ISA we implement.
-(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10"
+(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10,lxvp,stxvp"
(const_string "any"))
;; Is this alternative enabled for the current CPU/ISA/etc.?
@@ -403,6 +403,14 @@ (define_attr "enabled" ""
(and (eq_attr "isa" "p10")
(match_test "TARGET_POWER10"))
(const_int 1)
+
+ (and (eq_attr "isa" "lxvp")
+ (match_test "TARGET_LOAD_VECTOR_PAIR"))
+ (const_int 1)
+
+ (and (eq_attr "isa" "stxvp")
+ (match_test "TARGET_STORE_VECTOR_PAIR"))
+ (const_int 1)
] (const_int 0)))
;; If this instruction is microcoded on the CELL processor
@@ -597,6 +597,14 @@ mmma
Target Mask(MMA) Var(rs6000_isa_flags)
Generate (do not generate) MMA instructions.
+mload-vector-pair
+Target Undocumented Mask(LOAD_VECTOR_PAIR) Var(rs6000_isa_flags)
+Generate (do not generate) load vector pair instructions.
+
+mstore-vector-pair
+Target Undocumented Mask(STORE_VECTOR_PAIR) Var(rs6000_isa_flags)
+Generate (do not generate) store vector pair instructions.
+
mrelative-jumptables
Target Undocumented Var(rs6000_relative_jumptables) Init(1) Save
new file mode 100644
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test if we can control generating load and store vector pair via the target
+ attribute. */
+
+__attribute__((__target__("load-vector-pair,store-vector-pair")))
+void
+test_load_store (__vector_pair *p, __vector_pair *q)
+{
+ *p = *q; /* 1 lxvp, 1 stxvp. */
+}
+
+__attribute__((__target__("load-vector-pair,no-store-vector-pair")))
+void
+test_load_no_store (__vector_pair *p, __vector_pair *q)
+{
+ *p = *q; /* 1 lxvp, 2 stxv. */
+}
+
+__attribute__((__target__("no-load-vector-pair,store-vector-pair")))
+void
+test_store_no_load (__vector_pair *p, __vector_pair *q)
+{
+ *p = *q; /* 2 lxv, 1 stxvp. */
+}
+
+__attribute__((__target__("no-load-vector-pair,no-store-vector-pair")))
+void
+test_no_load_or_store (__vector_pair *p, __vector_pair *q)
+{
+ *p = *q; /* 2 lxv, 2 stxv. */
+}
+
+/* { dg-final { scan-assembler-times {\mp?lxvpx?\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mp?stxvpx?\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mp?lxvx?\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mp?stxvx?\M} 4 } } */
new file mode 100644
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test if we can control generating load and store vector pair via the #pragma
+ directive. */
+
+#pragma gcc push_options
+#pragma GCC target("load-vector-pair,store-vector-pair")
+
+void
+test_load_store (__vector_pair *p, __vector_pair *q)
+{
+ *p = *q; /* 1 lxvp, 1 stxvp. */
+}
+
+#pragma gcc pop_options
+
+#pragma gcc push_options
+#pragma GCC target("load-vector-pair,no-store-vector-pair")
+
+void
+test_load_no_store (__vector_pair *p, __vector_pair *q)
+{
+ *p = *q; /* 1 lxvp, 2 stxv. */
+}
+
+#pragma gcc pop_options
+
+#pragma gcc push_options
+#pragma GCC target("no-load-vector-pair,store-vector-pair")
+
+void
+test_store_no_load (__vector_pair *p, __vector_pair *q)
+{
+ *p = *q; /* 2 lxv, 1 stxvp. */
+}
+
+#pragma gcc pop_options
+
+#pragma gcc push_options
+#pragma GCC target("no-load-vector-pair,no-store-vector-pair")
+
+void
+test_no_load_or_store (__vector_pair *p, __vector_pair *q)
+{
+ *p = *q; /* 2 lxv, 2 stxv. */
+}
+
+#pragma gcc pop_options
+
+/* { dg-final { scan-assembler-times {\mp?lxvpx?\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mp?stxvpx?\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mp?lxvx?\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mp?stxvx?\M} 4 } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test if we generate load and store vector pair by default on power 10. */
+
+void
+test (__vector_pair *p, __vector_pair *q)
+{
+ *p = *q; /* 1 lxvp, 1 stxvp. */
+}
+
+/* { dg-final { scan-assembler-times {\mp?lxvpx?\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mp?stxvpx?\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mp?lxvx?\M} } } */
+/* { dg-final { scan-assembler-not {\mp?stxvx?\M} } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mno-store-vector-pair" } */
+
+/* Test if we generate load vector pair but not store vector pair if
+ -mno-store-vector-pair is used on power10. */
+
+void
+test (__vector_pair *p, __vector_pair *q)
+{
+ *p = *q; /* 1 lxvp, 2 stxv. */
+}
+
+/* { dg-final { scan-assembler-times {\mp?lxvpx?\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mp?stxvpx?\M} } } */
+/* { dg-final { scan-assembler-not {\mp?lxvx?\M} } } */
+/* { dg-final { scan-assembler-times {\mp?stxvx?\M} 2 } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mno-load-vector-pair" } */
+
+/* Test if we do not generate load vector pair but generate store vector pair
+ if -mno-load-vector-pair is used on power10. */
+
+void
+test (__vector_pair *p, __vector_pair *q)
+{
+ *p = *q; /* 2 lxv, 1 stxvp. */
+}
+
+/* { dg-final { scan-assembler-not {\mp?lxvpx?\M} } } */
+/* { dg-final { scan-assembler-times {\mp?stxvpx?\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mp?lxvx?\M} 2 } } */
+/* { dg-final { scan-assembler-not {\mp?stxvx?\M} } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mno-load-vector-pair -mno-store-vector-pair" } */
+
+/* Test if we do not generate load and store vector pair if directed to on
+ power 10. */
+
+void
+test (__vector_pair *p, __vector_pair *q)
+{
+ *p = *q; /* 2 lxv, 2 stxv. */
+}
+
+/* { dg-final { scan-assembler-not {\mp?lxvpx?\M} } } */
+/* { dg-final { scan-assembler-not {\mp?stxvpx?\M} } } */
+/* { dg-final { scan-assembler-times {\mp?lxvx?\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mp?stxvx?\M} 2 } } */