@@ -1301,7 +1301,7 @@ (define_predicate "splat_input_operand"
;; Return 1 if this operand is valid for a MMA assemble accumulator insn.
(define_special_predicate "mma_assemble_input_operand"
- (match_test "(mode == V16QImode
+ (match_test "(VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16
&& (vsx_register_operand (op, mode)
|| (MEM_P (op)
&& (indexed_or_indirect_address (XEXP (op, 0), mode)
@@ -4132,6 +4132,11 @@
void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
STXVP nothing {mma,pair}
+;; General vector pair built-in functions
+
+ v256 __builtin_vpair_zero ();
+ VPAIR_ZERO vpair_zero {mma}
+
;; vector pair built-in functions for 8 32-bit float values
v256 __builtin_vpair_f32_abs (v256);
@@ -4140,6 +4145,12 @@
v256 __builtin_vpair_f32_add (v256, v256);
VPAIR_F32_ADD vpair_add_v8sf3 {mma,pair}
+ v256 __builtin_vpair_f32_assemble (vf, vf);
+ VPAIR_F32_ASSEMBLE vpair_assemble_v8sf {mma,pair}
+
+ vf __builtin_vpair_f32_extract_vector (v256, const int<1>);
+ VPAIR_F32_EXTRACT_VECTOR vpair_extract_vector_v8sf {mma,pair}
+
v256 __builtin_vpair_f32_fma (v256, v256, v256);
VPAIR_F32_FMA vpair_fma_v8sf4 {mma,pair}
@@ -4155,6 +4166,9 @@
v256 __builtin_vpair_f32_neg (v256);
VPAIR_F32_NEG vpair_neg_v8sf2 {mma,pair}
+ v256 __builtin_vpair_f32_splat (float);
+ VPAIR_F32_SPLAT vpair_splat_v8sf {mma,pair}
+
v256 __builtin_vpair_f32_sub (v256, v256);
VPAIR_F32_SUB vpair_sub_v8sf3 {mma,pair}
@@ -4166,6 +4180,12 @@
v256 __builtin_vpair_f64_add (v256, v256);
VPAIR_F64_ADD vpair_add_v4df3 {mma,pair}
+v256 __builtin_vpair_f64_assemble (vd, vd);
+ VPAIR_F64_ASSEMBLE vpair_assemble_v4df {mma,pair}
+
+ vd __builtin_vpair_f64_extract_vector (v256, const int<1>);
+ VPAIR_F64_EXTRACT_VECTOR vpair_extract_vector_v4df {mma,pair}
+
v256 __builtin_vpair_f64_fma (v256, v256, v256);
VPAIR_F64_FMA vpair_fma_v4df4 {mma,pair}
@@ -4181,6 +4201,9 @@
v256 __builtin_vpair_f64_neg (v256);
VPAIR_F64_NEG vpair_neg_v4df2 {mma,pair}
+ v256 __builtin_vpair_f64_splat (double);
+ VPAIR_F64_SPLAT vpair_splat_v4df {mma,pair}
+
v256 __builtin_vpair_f64_sub (v256, v256);
VPAIR_F64_SUB vpair_sub_v4df3 {mma,pair}
@@ -4193,6 +4216,12 @@
v256 __builtin_vpair_i8_and (v256, v256);
VPAIR_I8_AND vpair_and_v32qi3 {mma,pair}
+ v256 __builtin_vpair_i8_assemble (vsc, vsc);
+ VPAIR_I8_ASSEMBLE vpair_assemble_v32qi {mma,pair}
+
+ vsc __builtin_vpair_i8_extract_vector (v256, const int<1>);
+ VPAIR_I8_EXTRACT_VECTOR vpair_extract_vector_v32qi {mma,pair}
+
v256 __builtin_vpair_i8_ior (v256, v256);
VPAIR_I8_IOR vpair_ior_v32qi3 {mma,pair}
@@ -4208,18 +4237,30 @@
v256 __builtin_vpair_i8_not (v256);
VPAIR_I8_NOT vpair_not_v32qi2 {mma,pair}
+ v256 __builtin_vpair_i8_splat (signed char);
+ VPAIR_I8_SPLAT vpair_splat_v32qi {mma,pair}
+
v256 __builtin_vpair_i8_sub (v256, v256);
VPAIR_I8_SUB vpair_sub_v32qi3 {mma,pair}
v256 __builtin_vpair_i8_xor (v256, v256);
VPAIR_I8_XOR vpair_xor_v32qi3 {mma,pair}
+ v256 __builtin_vpair_i8u_assemble (vuc, vuc);
+ VPAIR_I8U_ASSEMBLE vpair_assemble_v32qi {mma,pair}
+
+ vuc __builtin_vpair_i8u_extract_vector (v256, const int<1>);
+ VPAIR_I8U_EXTRACT_VECTOR vpair_extract_vector_v32qi {mma,pair}
+
v256 __builtin_vpair_i8u_max (v256, v256);
VPAIR_I8U_MAX vpair_umax_v32qi3 {mma,pair}
v256 __builtin_vpair_i8u_min (v256, v256);
VPAIR_I8U_MIN vpair_umin_v32qi3 {mma,pair}
+ v256 __builtin_vpair_i8u_splat (unsigned char);
+ VPAIR_I8U_SPLAT vpair_splat_v32qi {mma,pair}
+
;; vector pair built-in functions for 16 16-bit unsigned short or
;; signed short values
@@ -4229,6 +4270,12 @@
v256 __builtin_vpair_i16_and (v256, v256);
VPAIR_I16_AND vpair_and_v16hi3 {mma,pair}
+ v256 __builtin_vpair_i16_assemble (vss, vss);
+ VPAIR_I16_ASSEMBLE vpair_assemble_v16hi {mma,pair}
+
+ vss __builtin_vpair_i16_extract_vector (v256, const int<1>);
+ VPAIR_I16_EXTRACT_VECTOR vpair_extract_vector_v16hi {mma,pair}
+
v256 __builtin_vpair_i16_ior (v256, v256);
VPAIR_I16_IOR vpair_ior_v16hi3 {mma,pair}
@@ -4244,18 +4291,30 @@
v256 __builtin_vpair_i16_not (v256);
VPAIR_I16_NOT vpair_not_v16hi2 {mma,pair}
+ v256 __builtin_vpair_i16_splat (short);
+ VPAIR_I16_SPLAT vpair_splat_v16hi {mma,pair}
+
v256 __builtin_vpair_i16_sub (v256, v256);
VPAIR_I16_SUB vpair_sub_v16hi3 {mma,pair}
v256 __builtin_vpair_i16_xor (v256, v256);
VPAIR_I16_XOR vpair_xor_v16hi3 {mma,pair}
+ v256 __builtin_vpair_i16u_assemble (vus, vus);
+ VPAIR_I16U_ASSEMBLE vpair_assemble_v16hi {mma,pair}
+
+ vus __builtin_vpair_i16u_extract_vector (v256, const int<1>);
+ VPAIR_I16U_EXTRACT_VECTOR vpair_extract_vector_v16hi {mma,pair}
+
v256 __builtin_vpair_i16u_max (v256, v256);
VPAIR_I16U_MAX vpair_umax_v16hi3 {mma,pair}
v256 __builtin_vpair_i16u_min (v256, v256);
VPAIR_I16U_MIN vpair_umin_v16hi3 {mma,pair}
+ v256 __builtin_vpair_i16u_splat (unsigned short);
+ VPAIR_I16U_SPLAT vpair_splat_v16hi {mma,pair}
+
;; vector pair built-in functions for 8 32-bit unsigned int or
;; signed int values
@@ -4265,6 +4324,12 @@
v256 __builtin_vpair_i32_and (v256, v256);
VPAIR_I32_AND vpair_and_v8si3 {mma,pair}
+ v256 __builtin_vpair_i32_assemble (vsi, vsi);
+ VPAIR_I32_ASSEMBLE vpair_assemble_v8si {mma,pair}
+
+ vsi __builtin_vpair_i32_extract_vector (v256, const int<1>);
+ VPAIR_I32_EXTRACT_VECTOR vpair_extract_vector_v8si {mma,pair}
+
v256 __builtin_vpair_i32_ior (v256, v256);
VPAIR_I32_IOR vpair_ior_v8si3 {mma,pair}
@@ -4280,18 +4345,30 @@
v256 __builtin_vpair_i32_not (v256);
VPAIR_I32_NOT vpair_not_v8si2 {mma,pair}
+ v256 __builtin_vpair_i32_splat (int);
+ VPAIR_I32_SPLAT vpair_splat_v8si {mma,pair}
+
v256 __builtin_vpair_i32_sub (v256, v256);
VPAIR_I32_SUB vpair_sub_v8si3 {mma,pair}
v256 __builtin_vpair_i32_xor (v256, v256);
VPAIR_I32_XOR vpair_xor_v8si3 {mma,pair}
+ v256 __builtin_vpair_i32u_assemble (vui, vui);
+ VPAIR_I32U_ASSEMBLE vpair_assemble_v8si {mma,pair}
+
+ vui __builtin_vpair_i32u_extract_vector (v256, const int<1>);
+ VPAIR_I32U_EXTRACT_VECTOR vpair_extract_vector_v8si {mma,pair}
+
v256 __builtin_vpair_i32u_max (v256, v256);
VPAIR_I32U_MAX vpair_umax_v8si3 {mma,pair}
v256 __builtin_vpair_i32u_min (v256, v256);
VPAIR_I32U_MIN vpair_umin_v8si3 {mma,pair}
+ v256 __builtin_vpair_i32u_splat (unsigned int);
+ VPAIR_I32U_SPLAT vpair_splat_v8si {mma,pair}
+
;; vector pair built-in functions for 4 64-bit unsigned long long or
;; signed long long values
@@ -4301,6 +4378,12 @@
v256 __builtin_vpair_i64_and (v256, v256);
VPAIR_I64_AND vpair_and_v4di3 {mma,pair}
+ v256 __builtin_vpair_i64_assemble (vsll, vsll);
+ VPAIR_I64_ASSEMBLE vpair_assemble_v4di {mma,pair}
+
+ vsll __builtin_vpair_i64_extract_vector (v256, const int<1>);
+ VPAIR_I64_EXTRACT_VECTOR vpair_extract_vector_v4di {mma,pair}
+
v256 __builtin_vpair_i64_ior (v256, v256);
VPAIR_I64_IOR vpair_ior_v4di3 {mma,pair}
@@ -4316,14 +4399,26 @@
v256 __builtin_vpair_i64_not (v256);
VPAIR_I64_NOT vpair_not_v4di2 {mma,pair}
+ v256 __builtin_vpair_i64_splat (long long);
+ VPAIR_I64_SPLAT vpair_splat_v4di {mma,pair}
+
v256 __builtin_vpair_i64_sub (v256, v256);
VPAIR_I64_SUB vpair_sub_v4di3 {mma,pair}
v256 __builtin_vpair_i64_xor (v256, v256);
VPAIR_I64_XOR vpair_xor_v4di3 {mma,pair}
+ v256 __builtin_vpair_i64u_assemble (vull, vull);
+ VPAIR_I64U_ASSEMBLE vpair_assemble_v4di {mma,pair}
+
+ vull __builtin_vpair_i64u_extract_vector (v256, const int<1>);
+ VPAIR_I64U_EXTRACT_VECTOR vpair_extract_vector_v4di {mma,pair}
+
v256 __builtin_vpair_i64u_max (v256, v256);
VPAIR_I64U_MAX vpair_umax_v4di3 {mma,pair}
v256 __builtin_vpair_i64u_min (v256, v256);
VPAIR_I64U_MIN vpair_umin_v4di3 {mma,pair}
+
+ v256 __builtin_vpair_i64u_splat (unsigned long long);
+ VPAIR_I64U_SPLAT vpair_splat_v4di {mma,pair}
@@ -33,6 +33,8 @@ (define_c_enum "unspec"
UNSPEC_VPAIR_V16HI
UNSPEC_VPAIR_V8SI
UNSPEC_VPAIR_V4DI
+ UNSPEC_VPAIR_ZERO
+ UNSPEC_VPAIR_SPLAT
])
;; Iterator doing unary/binary arithmetic on vector pairs
@@ -93,6 +95,13 @@ (define_int_iterator VP_INT [UNSPEC_VPAIR_V4DI
UNSPEC_VPAIR_V16HI
UNSPEC_VPAIR_V32QI])
+(define_int_iterator VP_ALL [UNSPEC_VPAIR_V4DF
+ UNSPEC_VPAIR_V8SF
+ UNSPEC_VPAIR_V4DI
+ UNSPEC_VPAIR_V8SI
+ UNSPEC_VPAIR_V16HI
+ UNSPEC_VPAIR_V32QI])
+
;; Map VP_* to vector mode of the arguments after they are split
(define_int_attr VP_VEC_MODE [(UNSPEC_VPAIR_V4DF "V2DF")
(UNSPEC_VPAIR_V8SF "V4SF")
@@ -126,6 +135,182 @@ (define_int_attr vp_neg_reg [(UNSPEC_VPAIR_V32QI "&v")
(UNSPEC_VPAIR_V8SI "X")
(UNSPEC_VPAIR_V4DI "X")])
+;; Moddes of the vector element to splat to vector pair
+(define_mode_iterator VP_SPLAT [DF SF DI SI HI QI])
+
+;; Moddes of the vector to splat to vector pair
+(define_mode_iterator VP_SPLAT_VEC [V2DF V4SF V2DI V4SI V8HI V16QI])
+
+;; MAP VP_SPLAT and VP_SPLAT_VEC to the mode of the vector pair operation
+(define_mode_attr vp_splat_pmode [(DF "v4df")
+ (V2DF "v4df")
+ (SF "v8sf")
+ (V4SF "v8sf")
+ (DI "v4di")
+ (V2DI "v4di")
+ (SI "v8si")
+ (V4SI "v8si")
+ (HI "v16hi")
+ (V8HI "v16hi")
+ (QI "v32qi")
+ (V16QI "v32qi")])
+
+;; MAP VP_SPLAT to the mode of the vector containing the element
+(define_mode_attr VP_SPLAT_VMODE [(DF "V2DF")
+ (SF "V4SF")
+ (DI "V2DI")
+ (SI "V4SI")
+ (HI "V8HI")
+ (QI "V16QI")])
+
+;; Initialize a vector pair to 0
+(define_insn_and_split "vpair_zero"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+ (unspec:OO [(const_int 0)] UNSPEC_VPAIR_ZERO))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 1) (match_dup 3))
+ (set (match_dup 2) (match_dup 3))]
+{
+ rtx op0 = operands[0];
+ unsigned offset_hi = (WORDS_BIG_ENDIAN) ? 0 : 16;
+ unsigned offset_lo = (WORDS_BIG_ENDIAN) ? 16 : 0;
+
+ operands[1] = simplify_gen_subreg (V2DImode, op0, OOmode, offset_hi);
+ operands[2] = simplify_gen_subreg (V2DImode, op0, OOmode, offset_lo);
+ operands[3] = CONST0_RTX (V2DImode);
+}
+ [(set_attr "length" "8")])
+
+;; Assemble a vector pair from two vectors. Unlike
+;; __builtin_mma_assemble_pair, this function produces a vector pair output
+;; directly and it takes all of the vector types.
+;;
+;; We cannot update the two output registers atomically, so mark the output as
+;; an early clobber so we don't accidentally clobber the input operands. */
+
+(define_insn_and_split "vpair_assemble_<vp_pmode>"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=&wa")
+ (unspec:OO
+ [(match_operand:<VP_VEC_MODE> 1 "mma_assemble_input_operand" "mwa")
+ (match_operand:<VP_VEC_MODE> 2 "mma_assemble_input_operand" "mwa")]
+ VP_ALL))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx src = gen_rtx_UNSPEC (OOmode,
+ gen_rtvec (2, operands[1], operands[2]),
+ UNSPEC_VSX_ASSEMBLE);
+ rs6000_split_multireg_move (operands[0], src);
+ DONE;
+}
+ [(set_attr "length" "8")])
+
+;; Extract one of the two 128-bit vectors from a vector pair.
+(define_insn_and_split "vpair_extract_vector_<vp_pmode>"
+ [(set (match_operand:<VP_VEC_MODE> 0 "vsx_register_operand" "=wa")
+ (unspec:<VP_VEC_MODE>
+ [(match_operand:OO 1 "vsx_register_operand" "wa")
+ (match_operand 2 "const_0_to_1_operand" "n")]
+ VP_ALL))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (match_dup 3))]
+{
+ machine_mode vmode = <VP_VEC_MODE>mode;
+ unsigned reg_num = UINTVAL (operands[2]);
+ if (!WORDS_BIG_ENDIAN)
+ reg_num = 1 - reg_num;
+
+ operands[3] = simplify_gen_subreg (vmode, operands[1], OOmode, reg_num * 16);
+})
+
+;; Optimize extracting an 128-bit vector from a vector pair in memory.
+(define_insn_and_split "*vpair_extract_vector_<vp_pmode>_mem"
+ [(set (match_operand:<VP_VEC_MODE> 0 "vsx_register_operand" "=wa")
+ (unspec:<VP_VEC_MODE>
+ [(match_operand:OO 1 "memory_operand" "o")
+ (match_operand 2 "const_0_to_1_operand" "n")]
+ VP_ALL))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (match_dup 3))]
+{
+ operands[3] = adjust_address (operands[1], <VP_VEC_MODE>mode,
+ 16 * INTVAL (operands[2]));
+}
+ [(set_attr "type" "vecload")])
+
+;; Create a vector pair with a value splat'ed (duplicated) to all of the
+;; elements.
+(define_expand "vpair_splat_<vp_splat_pmode>"
+ [(use (match_operand:OO 0 "vsx_register_operand"))
+ (use (match_operand:VP_SPLAT 1 "input_operand"))]
+ "TARGET_MMA"
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ machine_mode element_mode = <MODE>mode;
+ machine_mode vector_mode = <VP_SPLAT_VMODE>mode;
+
+ if (op1 == CONST0_RTX (element_mode))
+ {
+ emit_insn (gen_vpair_zero (op0));
+ DONE;
+ }
+
+ rtx vec = gen_reg_rtx (vector_mode);
+ unsigned num_elements = GET_MODE_NUNITS (vector_mode);
+ rtvec elements = rtvec_alloc (num_elements);
+ for (size_t i = 0; i < num_elements; i++)
+ RTVEC_ELT (elements, i) = copy_rtx (op1);
+
+ rs6000_expand_vector_init (vec, gen_rtx_PARALLEL (vector_mode, elements));
+ emit_insn (gen_vpair_splat_<vp_splat_pmode>_internal (op0, vec));
+ DONE;
+})
+
+;; Inner splat support. Operand1 is the vector splat created above. Allow
+;; operand 1 to overlap with the output registers to eliminate one move
+;; instruction.
+(define_insn_and_split "vpair_splat_<vp_splat_pmode>_internal"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+ (unspec:OO
+ [(match_operand:VP_SPLAT_VEC 1 "vsx_register_operand" "0,wa")]
+ UNSPEC_VPAIR_SPLAT))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op0_vector0 = simplify_gen_subreg (<MODE>mode, op0, OOmode, 0);
+ rtx op0_vector1 = simplify_gen_subreg (<MODE>mode, op0, OOmode, 16);
+
+ /* Check if the input is one of the output registers. */
+ if (rtx_equal_p (op0_vector0, op1))
+ emit_move_insn (op0_vector1, op1);
+
+ else if (rtx_equal_p (op0_vector1, op1))
+ emit_move_insn (op0_vector0, op1);
+
+ else
+ {
+ emit_move_insn (op0_vector0, op1);
+ emit_move_insn (op0_vector1, op1);
+ }
+
+ DONE;
+}
+ [(set_attr "length" "*,8")
+ (set_attr "type" "vecmove")])
+
;; Vector pair floating point unary operations
(define_insn_and_split "vpair_<vp_insn>_<vp_pmode>2"
@@ -21386,17 +21386,27 @@ two 128-bit vectors stored in the vector pair. The
@code{__vector_pair} type is usually stored with a single vector pair
store instruction.
+The following built-in functions are independent on the type of the
+underlying vector:
+
+@smallexample
+__vector_pair __builtin_vpair_zero ();
+@end smallexample
+
The following built-in functions operate on pairs of
@code{vector float} values:
@smallexample
__vector_pair __builtin_vpair_f32_abs (__vector_pair);
__vector_pair __builtin_vpair_f32_add (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_f32_assemble (vector float, vector float);
+vector float __builtin_vpair_f32_extract_vector (__vector_pair, int);
__vector_pair __builtin_vpair_f32_fma (__vector_pair, __vector_pair, __vector_pair);
__vector_pair __builtin_vpair_f32_max (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_f32_min (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_f32_mul (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_f32_neg (__vector_pair);
+__vector_pair __builtin_vpair_f32_splat (float);
__vector_pair __builtin_vpair_f32_sub (__vector_pair, __vector_pair);
@end smallexample
@@ -21406,11 +21416,14 @@ The following built-in functions operate on pairs of
@smallexample
__vector_pair __builtin_vpair_f64_abs (__vector_pair);
__vector_pair __builtin_vpair_f64_add (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_f64_assemble (vector double, vector double);
+vector double __builtin_vpair_f64_extract_vector (__vector_pair, int);
__vector_pair __builtin_vpair_f64_fma (__vector_pair, __vector_pair, __vector_pair);
__vector_pair __builtin_vpair_f64_mul (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_f64_neg (__vector_pair);
__vector_pair __builtin_vpair_f64_max (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_f64_min (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_f64_splat (double);
__vector_pair __builtin_vpair_f64_sub (__vector_pair, __vector_pair);
@end smallexample
@@ -21420,16 +21433,24 @@ The following built-in functions operate on pairs of
@smallexample
__vector_pair __builtin_vpair_i64_add (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i64_and (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_i64_assemble (vector long long,
+ vector long long);
+vector long long __builtin_vpair_i64_extract_vector (__vector_pair, int);
__vector_pair __builtin_vpair_i64_ior (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i64_max (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i64_min (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i64_neg (__vector_pair);
__vector_pair __builtin_vpair_i64_not (__vector_pair);
+__vector_pair __builtin_vpair_i64_splat (long long);
__vector_pair __builtin_vpair_i64_sub (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i64_xor (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_i64u_assemble (vector unsigned long long,
+ vector unsigned long long);
+vector unsigned long long __builtin_vpair_i64u_extract_vector (__vector_pair, int);
__vector_pair __builtin_vpair_i64u_max (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i64u_min (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_i64u_splat (unsigned long long);
@end smallexample
The following built-in functions operate on pairs of
@@ -21438,16 +21459,23 @@ The following built-in functions operate on pairs of
@smallexample
__vector_pair __builtin_vpair_i32_add (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i32_and (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_i32_assemble (vector int, vector int);
+vector int __builtin_vpair_i32_extract_vector (__vector_pair, int);
__vector_pair __builtin_vpair_i32_ior (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i32_neg (__vector_pair);
__vector_pair __builtin_vpair_i32_not (__vector_pair);
__vector_pair __builtin_vpair_i32_max (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i32_min (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_i32_splat (int);
__vector_pair __builtin_vpair_i32_sub (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i32_xor (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_i32u_assemble (vector unsigned int,
+ vector unsigned int);
+vector unsigned int __builtin_vpair_i32u_extract_vector (__vector_pair, int);
__vector_pair __builtin_vpair_i32u_max (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i32u_min (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_i32u_splat (unsigned int);
@end smallexample
The following built-in functions operate on pairs of
@@ -21456,6 +21484,10 @@ The following built-in functions operate on pairs of
@smallexample
__vector_pair __builtin_vpair_i16_add (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i16_and (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_i16_assemble (vector short,
+ vector short);
+__vector_pair __builtin_vpair_i16_splat (short);
+vector short __builtin_vpair_i16_extract_vector (__vector_pair, int);
__vector_pair __builtin_vpair_i16_ior (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i16_max (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i16_min (__vector_pair, __vector_pair);
@@ -21464,6 +21496,10 @@ __vector_pair __builtin_vpair_i16_not (__vector_pair);
__vector_pair __builtin_vpair_i16_sub (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i16_xor (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_i16u_assemble (vector unsigned short,
+ vector unsigned short);
+vector unsigned short __builtin_vpair_i16u_extract_vector (__vector_pair, int);
+__vector_pair __builtin_vpair_i16u_splat (unsigned short);
__vector_pair __builtin_vpair_i16u_max (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i16u_min (__vector_pair, __vector_pair);
@end smallexample
@@ -21474,6 +21510,10 @@ The following built-in functions operate on pairs of
@smallexample
__vector_pair __builtin_vpair_i8_add (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i8_and (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_i8_assemble (vector signed char,
+ vector signed char);
+vector signed char __builtin_vpair_i8_extract_vector (__vector_pair, int);
+__vector_pair __builtin_vpair_i8_splat (signed char);
__vector_pair __builtin_vpair_i8_ior (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i8_max (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i8_min (__vector_pair, __vector_pair);
@@ -21482,8 +21522,12 @@ __vector_pair __builtin_vpair_i8_not (__vector_pair);
__vector_pair __builtin_vpair_i8_sub (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i8_xor (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_i8u_assemble (vector unsigned char,
+ vector unsigned char4);
+vector unsigned char __builtin_vpair_i8u_extract_vector (__vector_pair, int);
__vector_pair __builtin_vpair_i8_umax (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i8_umin (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_i8u_splat (unsigned char);
@end smallexample
@node PowerPC Hardware Transactional Memory Built-in Functions
new file mode 100644
@@ -0,0 +1,86 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test the vector pair built-in functions for creation and extraction of
+ vector pair operations using 32-bit floats. */
+
+void
+test_f32_splat_0 (__vector_pair *p)
+{
+ /* 2 xxspltib, 1 stxvp. */
+ *p = __builtin_vpair_f32_splat (0.0f);
+}
+
+void
+test_f32_splat_1 (__vector_pair *p)
+{
+ /* 1 xxspltiw, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_f32_splat (1.0f);
+}
+
+void
+test_f32_splat_var (__vector_pair *p,
+ float f)
+{
+ /* 1 xscvdpspn, 1 xxspltw, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_f32_splat (f);
+}
+
+void
+test_f32_splat_mem (__vector_pair *p,
+ float *q)
+{
+ /* 1 lxvwsx, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_f32_splat (*q);
+}
+
+void
+test_f32_assemble (__vector_pair *p,
+ vector float v1,
+ vector float v2)
+{
+ /* 2 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_f32_assemble (v1, v2);
+}
+
+vector float
+test_f32_extract_0_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_f32_extract_vector (vp, 0);
+}
+
+vector float
+test_f32_extract_1_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_f32_extract_vector (vp, 0);
+}
+
+vector float
+test_f32_extract_0_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_f32_extract_vector (p[1], 0);
+}
+
+vector float
+test_f32_extract_1_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_f32_extract_vector (p[2], 1);
+}
+
+/* { dg-final { scan-assembler-times {\mlxv\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mlxvwsx\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxscvdpspn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxspltiw\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxxspltw\M} 1 } } */
new file mode 100644
@@ -0,0 +1,84 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test the vector pair built-in functions for creation and extraction of
+ vector pair operations using 64-bit doubles. */
+
+void
+test_f64_splat_0 (__vector_pair *p)
+{
+ /* 2 xxspltib. */
+ *p = __builtin_vpair_f64_splat (0.0);
+}
+
+void
+test_f64_splat_1 (__vector_pair *p)
+{
+ /* 1 xxspltidp, 1 xxlor. */
+ *p = __builtin_vpair_f64_splat (1.0);
+}
+
+void
+test_f64_splat_var (__vector_pair *p,
+ double d)
+{
+ /* 1 xxpermdi, 1 xxlor. */
+ *p = __builtin_vpair_f64_splat (d);
+}
+
+void
+test_f64_splat_mem (__vector_pair *p,
+ double *q)
+{
+ /* 1 lxvdsx, 1 xxlor. */
+ *p = __builtin_vpair_f64_splat (*q);
+}
+
+void
+test_f64_assemble (__vector_pair *p,
+ vector double v1,
+ vector double v2)
+{
+ /* 2 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_f64_assemble (v1, v2);
+}
+
+vector double
+test_f64_extract_0_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_f64_extract_vector (vp, 0);
+}
+
+vector double
+test_f64_extract_1_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_f64_extract_vector (vp, 0);
+}
+
+vector double
+test_f64_extract_0_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_f64_extract_vector (p[1], 0);
+}
+
+vector double
+test_f64_extract_1_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_f64_extract_vector (p[2], 1);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvdsx\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M} 1 } } */
new file mode 100644
@@ -0,0 +1,156 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test the vector pair built-in functions for creation and extraction of
+ vector pair operations using 64-bit integers. */
+
+void
+test_i64_splat_0 (__vector_pair *p)
+{
+ /* 2 xxspltib, 1 stxvp. */
+ *p = __builtin_vpair_i64_splat (0);
+}
+
+void
+test_i64_splat_1 (__vector_pair *p)
+{
+ /* 1 xxspltib, 1 vextsb2d, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i64_splat (1);
+}
+
+void
+test_i64_splat_var (__vector_pair *p,
+ long long ll)
+{
+ /* 1 xscvdpspn, 1 xxspltw, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i64_splat (ll);
+}
+
+void
+test_i64_splat_mem (__vector_pair *p,
+ long long *q)
+{
+ /* 1 lxvwsx, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i64_splat (*q);
+}
+
+void
+test_i64_assemble (__vector_pair *p,
+ vector long long v1,
+ vector long long v2)
+{
+ /* 2 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i64_assemble (v1, v2);
+}
+
+vector long long
+test_i64_extract_0_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_i64_extract_vector (vp, 0);
+}
+
+vector long long
+test_i64_extract_1_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_i64_extract_vector (vp, 0);
+}
+
+vector long long
+test_i64_extract_0_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_i64_extract_vector (p[1], 0);
+}
+
+vector long long
+test_i64_extract_1_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_i64_extract_vector (p[2], 1);
+}
+
+void
+test_i64u_splat_0 (__vector_pair *p)
+{
+ /* 2 xxspltib, 1 stxvp. */
+ *p = __builtin_vpair_i64u_splat (0);
+}
+
+void
+test_i64u_splat_1 (__vector_pair *p)
+{
+ /* 1 xxspltib, 1 vextsb2d, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i64u_splat (1);
+}
+
+void
+test_i64u_splat_var (__vector_pair *p,
+ unsigned long long ull)
+{
+ /* 1 xscvdpspn, 1 xxspltw, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i64u_splat (ull);
+}
+
+void
+test_i64u_splat_mem (__vector_pair *p,
+ unsigned long long *q)
+{
+ /* 1 lxvwsx, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i64u_splat (*q);
+}
+
+void
+test_i64u_assemble (__vector_pair *p,
+ vector unsigned long long v1,
+ vector unsigned long long v2)
+{
+ /* 2 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i64u_assemble (v1, v2);
+}
+
+vector unsigned long long
+test_i64u_extract_0_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_i64u_extract_vector (vp, 0);
+}
+
+vector unsigned long long
+test_i64u_extract_1_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_i64u_extract_vector (vp, 0);
+}
+
+vector unsigned long long
+test_i64u_extract_0_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_i64u_extract_vector (p[1], 0);
+}
+
+vector unsigned long long
+test_i64u_extract_1_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_i64u_extract_vector (p[2], 1);
+}
+
+/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mlxvdsx\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 10 } } */
+/* { dg-final { scan-assembler-times {\mvextsb2d\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 6 } } */
new file mode 100644
@@ -0,0 +1,139 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test the vector pair built-in functions for creation and extraction of
+ vector pair operations using 32-bit integers. */
+
+void
+test_i32_splat_0 (__vector_pair *p)
+{
+ /* 2 xxspltib, 1 stxvp. */
+ *p = __builtin_vpair_i32_splat (0);
+}
+
+void
+test_i32_splat_1 (__vector_pair *p)
+{
+ /* 1 vspltisw, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i32_splat (1);
+}
+
+void
+test_i32_splat_mem (__vector_pair *p,
+ int *q)
+{
+ /* 1 lxvwsx, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i32_splat (*q);
+}
+
+void
+test_i32_assemble (__vector_pair *p,
+ vector int v1,
+ vector int v2)
+{
+ /* 2 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i32_assemble (v1, v2);
+}
+
+vector int
+test_i32_extract_0_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_i32_extract_vector (vp, 0);
+}
+
+vector int
+test_i32_extract_1_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_i32_extract_vector (vp, 0);
+}
+
+vector int
+test_i32_extract_0_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_i32_extract_vector (p[1], 0);
+}
+
+vector int
+test_i32_extract_1_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_i32_extract_vector (p[2], 1);
+}
+
+void
+test_i32u_splat_0 (__vector_pair *p)
+{
+ /* 2 xxspltib, 1 stxvp. */
+ *p = __builtin_vpair_i32u_splat (0);
+}
+
+void
+test_i32u_splat_1 (__vector_pair *p)
+{
+ /* 1 vspltisw, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i32u_splat (1);
+}
+
+void
+test_i32u_splat_mem (__vector_pair *p,
+ unsigned int *q)
+{
+ /* 1 lxvwsx, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i32u_splat (*q);
+}
+
+void
+test_i32u_assemble (__vector_pair *p,
+ vector unsigned int v1,
+ vector unsigned int v2)
+{
+ /* 2 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i32u_assemble (v1, v2);
+}
+
+vector unsigned int
+test_i32u_extract_0_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_i32u_extract_vector (vp, 0);
+}
+
+vector unsigned int
+test_i32u_extract_1_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_i32u_extract_vector (vp, 0);
+}
+
+vector unsigned int
+test_i32u_extract_0_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_i32u_extract_vector (p[1], 0);
+}
+
+vector unsigned int
+test_i32u_extract_1_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_i32u_extract_vector (p[2], 1);
+}
+
+/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mlxvwsx\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 8 } } */
+/* { dg-final { scan-assembler-times {\mvspltisw\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 4 } } */
new file mode 100644
@@ -0,0 +1,141 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test the vector pair built-in functions for creation and extraction of
+ vector pair operations using 16-bit integers. */
+
+void
+test_i16_splat_0 (__vector_pair *p)
+{
+ /* 2 xxspltib, 1 stxvp. */
+ *p = __builtin_vpair_i16_splat (0);
+}
+
+void
+test_i16_splat_1 (__vector_pair *p)
+{
+ /* 1 vspltish, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i16_splat (1);
+}
+
+void
+test_i16_splat_mem (__vector_pair *p,
+ short *q)
+{
+ /* 1 lxsihzx, 1 vsplth, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i16_splat (*q);
+}
+
+void
+test_i16_assemble (__vector_pair *p,
+ vector short v1,
+ vector short v2)
+{
+ /* 2 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i16_assemble (v1, v2);
+}
+
+vector short
+test_i16_extract_0_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_i16_extract_vector (vp, 0);
+}
+
+vector short
+test_i16_extract_1_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_i16_extract_vector (vp, 0);
+}
+
+vector short
+test_i16_extract_0_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_i16_extract_vector (p[1], 0);
+}
+
+vector short
+test_i16_extract_1_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_i16_extract_vector (p[2], 1);
+}
+
+void
+test_i16u_splat_0 (__vector_pair *p)
+{
+ /* 2 xxspltib, 1 stxvp. */
+ *p = __builtin_vpair_i16u_splat (0);
+}
+
+void
+test_i16u_splat_1 (__vector_pair *p)
+{
+ /* 1 vspltish, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i16u_splat (1);
+}
+
+void
+test_i16u_splat_mem (__vector_pair *p,
+ unsigned short *q)
+{
+ /* 1 lxsihzx, 1 vsplth, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i16u_splat (*q);
+}
+
+void
+test_i16u_assemble (__vector_pair *p,
+ vector unsigned short v1,
+ vector unsigned short v2)
+{
+ /* 2 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i16u_assemble (v1, v2);
+}
+
+vector unsigned short
+test_i16u_extract_0_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_i16u_extract_vector (vp, 0);
+}
+
+vector unsigned short
+test_i16u_extract_1_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_i16u_extract_vector (vp, 0);
+}
+
+vector unsigned short
+test_i16u_extract_0_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_i16u_extract_vector (p[1], 0);
+}
+
+vector unsigned short
+test_i16u_extract_1_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_i16u_extract_vector (p[2], 1);
+}
+
+/* { dg-final { scan-assembler-times {\mlxsihzx\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 8 } } */
+/* { dg-final { scan-assembler-times {\mvsplth\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvspltish\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxlor\M} 12 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 4 } } */
new file mode 100644
@@ -0,0 +1,139 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test the vector pair built-in functions for creation and extraction of
+ vector pair operations using 8-bit integers. */
+
+void
+test_i8_splat_0 (__vector_pair *p)
+{
+ /* 2 xxspltib, 1 stxvp. */
+ *p = __builtin_vpair_i8_splat (0);
+}
+
+void
+test_i8_splat_1 (__vector_pair *p)
+{
+ /* 1 vspltisb, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i8_splat (1);
+}
+
+void
+test_i8_splat_mem (__vector_pair *p,
+ signed char *q)
+{
+ /* 1 lxsibzx, 1 vspltb, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i8_splat (*q);
+}
+
+void
+test_i8_assemble (__vector_pair *p,
+ vector signed char v1,
+ vector signed char v2)
+{
+ /* 2 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i8_assemble (v1, v2);
+}
+
+vector signed char
+test_i8_extract_0_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_i8_extract_vector (vp, 0);
+}
+
+vector signed char
+test_i8_extract_1_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_i8_extract_vector (vp, 0);
+}
+
+vector signed char
+test_i8_extract_0_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_i8_extract_vector (p[1], 0);
+}
+
+vector signed char
+test_i8_extract_1_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_i8_extract_vector (p[2], 1);
+}
+
+void
+test_i8u_splat_0 (__vector_pair *p)
+{
+ /* 2 xxspltib, 1 stxvp. */
+ *p = __builtin_vpair_i8u_splat (0);
+}
+
+void
+test_i8u_splat_1 (__vector_pair *p)
+{
+ /* 1 vspltisb, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i8u_splat (1);
+}
+
+void
+test_i8u_splat_mem (__vector_pair *p,
+ unsigned char *q)
+{
+ /* 1 lxsibzx, 1 vspltb, 1 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i8u_splat (*q);
+}
+
+void
+test_i8u_assemble (__vector_pair *p,
+ vector unsigned char v1,
+ vector unsigned char v2)
+{
+ /* 2 xxlor, 1 stxvp. */
+ *p = __builtin_vpair_i8u_assemble (v1, v2);
+}
+
+vector unsigned char
+test_i8u_extract_0_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_i8u_extract_vector (vp, 0);
+}
+
+vector unsigned char
+test_i8u_extract_1_reg (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xxlor. */
+ __vector_pair vp = *p;
+ __asm__ (" # extract in register %x0" : "+wa" (vp));
+ return __builtin_vpair_i8u_extract_vector (vp, 0);
+}
+
+vector unsigned char
+test_i8u_extract_0_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_i8u_extract_vector (p[1], 0);
+}
+
+vector unsigned char
+test_i8u_extract_1_mem (__vector_pair *p)
+{
+ /* 1 lxv. */
+ return __builtin_vpair_i8u_extract_vector (p[2], 1);
+}
+
+/* { dg-final { scan-assembler-times {\mlxsibzx\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 8 } } */
+/* { dg-final { scan-assembler-times {\mvspltb\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 6 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+void
+test_zero (__vector_pair *p)
+{
+ /* 2 xxspltib. */
+ *p = __builtin_vpair_zero ();
+}
+
+/* { dg-final { scan-assembler-times {\mstxvp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 2 } } */