@@ -241,8 +241,6 @@
UNSPEC_VEC_VFMIN
UNSPEC_VEC_VFMAX
- UNSPEC_VEC_ELTSWAP
-
UNSPEC_NNPA_VCLFNHS_V8HI
UNSPEC_NNPA_VCLFNLS_V8HI
UNSPEC_NNPA_VCRNFS_V8HI
@@ -948,6 +948,152 @@
operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8);
})
+;; VECTOR REVERSE ELEMENTS V16QI
+
+(define_expand "eltswapv16qi"
+ [(parallel
+ [(set (match_operand:V16QI 0 "nonimmediate_operand")
+ (vec_select:V16QI
+ (match_operand:V16QI 1 "nonimmediate_operand")
+ (match_dup 2)))
+ (use (match_dup 3))])]
+ "TARGET_VX"
+{
+ rtvec vec = rtvec_alloc (16);
+ for (int i = 0; i < 16; ++i)
+ RTVEC_ELT (vec, i) = GEN_INT (15 - i);
+ operands[2] = gen_rtx_PARALLEL (VOIDmode, vec);
+ operands[3] = gen_rtx_CONST_VECTOR (V16QImode, vec);
+})
+
+(define_insn_and_split "*eltswapv16qi"
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,^R,^v")
+ (vec_select:V16QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "v,^v,^R")
+ (parallel [(const_int 15)
+ (const_int 14)
+ (const_int 13)
+ (const_int 12)
+ (const_int 11)
+ (const_int 10)
+ (const_int 9)
+ (const_int 8)
+ (const_int 7)
+ (const_int 6)
+ (const_int 5)
+ (const_int 4)
+ (const_int 3)
+ (const_int 2)
+ (const_int 1)
+ (const_int 0)])))
+ (use (match_operand:V16QI 2 "permute_pattern_operand" "v,X,X"))]
+ "TARGET_VX"
+ "@
+ #
+ vstbrq\t%v1,%0
+ vlbrq\t%v0,%1"
+ "&& reload_completed && REG_P (operands[0]) && REG_P (operands[1])"
+ [(set (match_dup 0)
+ (unspec:V16QI [(match_dup 1)
+ (match_dup 1)
+ (match_dup 2)]
+ UNSPEC_VEC_PERM))]
+ ""
+ [(set_attr "cpu_facility" "*,vxe2,vxe2")
+ (set_attr "op_type" "*,VRX,VRX")])
+
+;; VECTOR REVERSE ELEMENTS V8HI
+
+(define_insn_and_split "eltswapv8hi"
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,R,v")
+ (vec_select:V8HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "v,v,R")
+ (parallel [(const_int 7)
+ (const_int 6)
+ (const_int 5)
+ (const_int 4)
+ (const_int 3)
+ (const_int 2)
+ (const_int 1)
+ (const_int 0)])))
+ (clobber (match_scratch:V2DI 2 "=&v,X,X"))
+ (clobber (match_scratch:V4SI 3 "=&v,X,X"))]
+ "TARGET_VX"
+ "@
+ #
+ vsterh\t%v1,%0
+ vlerh\t%v0,%1"
+ "&& reload_completed && REG_P (operands[0]) && REG_P (operands[1])"
+ [(set (match_dup 2)
+ (subreg:V2DI (match_dup 1) 0))
+ (set (match_dup 2)
+ (vec_select:V2DI
+ (match_dup 2)
+ (parallel [(const_int 1) (const_int 0)])))
+ (set (match_dup 2)
+ (rotate:V2DI
+ (match_dup 2)
+ (const_int 32)))
+ (set (match_dup 3)
+ (subreg:V4SI (match_dup 2) 0))
+ (set (match_dup 3)
+ (rotate:V4SI
+ (match_dup 3)
+ (const_int 16)))
+ (set (match_dup 0)
+ (subreg:V8HI (match_dup 3) 0))]
+ ""
+ [(set_attr "cpu_facility" "*,vxe2,vxe2")
+ (set_attr "op_type" "*,VRX,VRX")])
+
+;; VECTOR REVERSE ELEMENTS V4SI / V4SF
+
+(define_insn_and_split "eltswap<mode>"
+ [(set (match_operand:V_HW_4 0 "nonimmediate_operand" "=v,R,v")
+ (vec_select:V_HW_4
+ (match_operand:V_HW_4 1 "nonimmediate_operand" "v,v,R")
+ (parallel [(const_int 3)
+ (const_int 2)
+ (const_int 1)
+ (const_int 0)])))
+ (clobber (match_scratch:V2DI 2 "=&v,X,X"))]
+ "TARGET_VX"
+ "@
+ #
+ vsterf\t%v1,%0
+ vlerf\t%v0,%1"
+ "&& reload_completed && REG_P (operands[0]) && REG_P (operands[1])"
+ [(set (match_dup 2)
+ (subreg:V2DI (match_dup 1) 0))
+ (set (match_dup 2)
+ (vec_select:V2DI
+ (match_dup 2)
+ (parallel [(const_int 1) (const_int 0)])))
+ (set (match_dup 2)
+ (rotate:V2DI
+ (match_dup 2)
+ (const_int 32)))
+ (set (match_dup 0)
+ (subreg:V_HW_4 (match_dup 2) 0))]
+ ""
+ [(set_attr "cpu_facility" "*,vxe2,vxe2")
+ (set_attr "op_type" "*,VRX,VRX")])
+
+;; VECTOR REVERSE ELEMENTS V2DI / V2DF
+
+(define_insn "eltswap<mode>"
+ [(set (match_operand:V_HW_2 0 "nonimmediate_operand" "=v,R,v")
+ (vec_select:V_HW_2
+ (match_operand:V_HW_2 1 "nonimmediate_operand" "v,v,R")
+ (parallel [(const_int 1)
+ (const_int 0)])))]
+ "TARGET_VX"
+ "@
+ vpdi\t%v0,%v1,%v1,4
+ vsterg\t%v1,%0
+ vlerg\t%v0,%1"
+ [(set_attr "cpu_facility" "vx,vxe2,vxe2")
+ (set_attr "op_type" "VRR,VRX,VRX")])
;;
;; Vector integer arithmetic instructions
@@ -2163,149 +2163,6 @@
"<vw>fmax<sdx>b\t%v0,%v1,%v2,%b3"
[(set_attr "op_type" "VRR")])
-; The element reversal builtins introduced with z15 have been made
-; available also for older CPUs down to z13.
-(define_expand "eltswap<mode>"
- [(set (match_operand:VEC_HW 0 "nonimmediate_operand" "")
- (unspec:VEC_HW [(match_operand:VEC_HW 1 "nonimmediate_operand" "")]
- UNSPEC_VEC_ELTSWAP))]
- "TARGET_VX")
-
-; The byte element reversal is implemented as 128 bit byte swap.
-; Alternatively this could be emitted as bswap:V1TI but the required
-; subregs appear to confuse combine.
-(define_insn "*eltswapv16qi"
- [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,v,R")
- (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "v,R,v")]
- UNSPEC_VEC_ELTSWAP))]
- "TARGET_VXE2"
- "@
- #
- vlbrq\t%v0,%v1
- vstbrq\t%v1,%v0"
- [(set_attr "op_type" "*,VRX,VRX")])
-
-; vlerh, vlerf, vlerg, vsterh, vsterf, vsterg
-(define_insn "*eltswap<mode>"
- [(set (match_operand:V_HW_HSD 0 "nonimmediate_operand" "=v,v,R")
- (unspec:V_HW_HSD [(match_operand:V_HW_HSD 1 "nonimmediate_operand" "v,R,v")]
- UNSPEC_VEC_ELTSWAP))]
- "TARGET_VXE2"
- "@
- #
- vler<bhfgq>\t%v0,%v1
- vster<bhfgq>\t%v1,%v0"
- [(set_attr "op_type" "*,VRX,VRX")])
-
-; The emulation pattern below will also accept
-; vst (eltswap (vl))
-; i.e. both operands in memory, which reload needs to fix.
-; Split into
-; vl
-; vster (=vst (eltswap))
-; since we prefer vster over vler as long as the latter
-; does not support alignment hints.
-(define_split
- [(set (match_operand:VEC_HW 0 "memory_operand" "")
- (unspec:VEC_HW [(match_operand:VEC_HW 1 "memory_operand" "")]
- UNSPEC_VEC_ELTSWAP))]
- "TARGET_VXE2 && can_create_pseudo_p ()"
- [(set (match_dup 2) (match_dup 1))
- (set (match_dup 0)
- (unspec:VEC_HW [(match_dup 2)] UNSPEC_VEC_ELTSWAP))]
-{
- operands[2] = gen_reg_rtx (<MODE>mode);
-})
-
-
-; Swapping v2df/v2di can be done via vpdi on z13 and z14.
-(define_split
- [(set (match_operand:V_HW_2 0 "register_operand" "")
- (unspec:V_HW_2 [(match_operand:V_HW_2 1 "register_operand" "")]
- UNSPEC_VEC_ELTSWAP))]
- "TARGET_VX && can_create_pseudo_p ()"
- [(set (match_operand:V_HW_2 0 "register_operand" "=v")
- (vec_select:V_HW_2
- (vec_concat:<vec_2x_nelts>
- (match_operand:V_HW_2 1 "register_operand" "v")
- (match_dup 1))
- (parallel [(const_int 1) (const_int 2)])))]
-)
-
-
-; Swapping v4df/v4si can be done via vpdi and rot.
-(define_split
- [(set (match_operand:V_HW_4 0 "register_operand" "")
- (unspec:V_HW_4 [(match_operand:V_HW_4 1 "register_operand" "")]
- UNSPEC_VEC_ELTSWAP))]
- "TARGET_VX && can_create_pseudo_p ()"
- [(set (match_dup 2)
- (vec_select:V_HW_4
- (vec_concat:<vec_2x_nelts>
- (match_dup 1)
- (match_dup 1))
- (parallel [(const_int 2) (const_int 3) (const_int 4) (const_int 5)])))
- (set (match_dup 3)
- (subreg:V2DI (match_dup 2) 0))
- (set (match_dup 4)
- (rotate:V2DI
- (match_dup 3)
- (const_int 32)))
- (set (match_operand:V_HW_4 0)
- (subreg:V_HW_4 (match_dup 4) 0))]
-{
- operands[2] = gen_reg_rtx (<MODE>mode);
- operands[3] = gen_reg_rtx (V2DImode);
- operands[4] = gen_reg_rtx (V2DImode);
-})
-
-; z15 has instructions for doing element reversal from mem to reg
-; or the other way around. For reg to reg or on pre z15 machines
-; we have to emulate it with vector permute.
-(define_insn_and_split "*eltswap<mode>_emu"
- [(set (match_operand:VEC_HW 0 "nonimmediate_operand" "=vR")
- (unspec:VEC_HW [(match_operand:VEC_HW 1 "nonimmediate_operand" "vR")]
- UNSPEC_VEC_ELTSWAP))]
- "TARGET_VX && can_create_pseudo_p ()"
- "#"
- "&& ((!memory_operand (operands[0], <MODE>mode)
- && !memory_operand (operands[1], <MODE>mode))
- || !TARGET_VXE2)"
- [(set (match_dup 3)
- (unspec:V16QI [(match_dup 4)
- (match_dup 4)
- (match_dup 2)]
- UNSPEC_VEC_PERM))
- (set (match_dup 0) (subreg:VEC_HW (match_dup 3) 0))]
-{
- static char p[4][16] =
- { { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }, /* Q */
- { 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1 }, /* H */
- { 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 }, /* S */
- { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 } }; /* D */
- char *perm;
- rtx perm_rtx[16], constv;
-
- switch (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)))
- {
- case 1: perm = p[0]; break;
- case 2: perm = p[1]; break;
- case 4: perm = p[2]; break;
- case 8: perm = p[3]; break;
- default: gcc_unreachable ();
- }
-
- for (int i = 0; i < 16; i++)
- perm_rtx[i] = GEN_INT (perm[i]);
-
- operands[1] = force_reg (<MODE>mode, operands[1]);
- operands[2] = gen_reg_rtx (V16QImode);
- operands[3] = gen_reg_rtx (V16QImode);
- operands[4] = simplify_gen_subreg (V16QImode, operands[1], <MODE>mode, 0);
- constv = force_const_mem (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm_rtx)));
- emit_move_insn (operands[2], constv);
-})
-
; vec_insert (__builtin_bswap32 (*a), b, 1) set-element-bswap-2.c
; b[1] = __builtin_bswap32 (*a) set-element-bswap-3.c
; vlebrh, vlebrf, vlebrg
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+/* { dg-require-effective-target s390_vx } */
+/* { dg-final { scan-assembler-times {\tvpdi\t} 4 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+V8HI
+v8hi (V8HI x)
+{
+ V8HI y;
+ for (int i = 0; i < 8; ++i)
+ y[i] = x[7 - i];
+ return y;
+}
+
+V4SI
+v4si (V4SI x)
+{
+ V4SI y;
+ for (int i = 0; i < 4; ++i)
+ y[i] = x[3 - i];
+ return y;
+}
+
+V2DI
+v2di (V2DI x)
+{
+ V2DI y;
+ for (int i = 0; i < 2; ++i)
+ y[i] = x[1 - i];
+ return y;
+}
+
+V2DF
+v2df (V2DF x)
+{
+ V2DF y;
+ for (int i = 0; i < 2; ++i)
+ y[i] = x[1 - i];
+ return y;
+}
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z14" } */
+/* { dg-require-effective-target s390_vxe } */
+/* { dg-final { scan-assembler-times {\tvpdi\t} 1 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef float __attribute__ ((vector_size (16))) V4SF;
+
+V4SF
+v4sf (V4SF x)
+{
+ V4SF y;
+ for (int i = 0; i < 4; ++i)
+ y[i] = x[3 - i];
+ return y;
+}
new file mode 100644
@@ -0,0 +1,56 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z14" } */
+/* { dg-require-effective-target s390_vxe } */
+/* { dg-final { scan-assembler-times {\tvpdi\t} 5 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef float __attribute__ ((vector_size (16))) V4SF;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+V8HI
+v8hi (V8HI *x)
+{
+ V8HI y;
+ for (int i = 0; i < 8; ++i)
+ y[i] = (*x)[7 - i];
+ return y;
+}
+
+V4SI
+v4si (V4SI *x)
+{
+ V4SI y;
+ for (int i = 0; i < 4; ++i)
+ y[i] = (*x)[3 - i];
+ return y;
+}
+
+V2DI
+v2di (V2DI *x)
+{
+ V2DI y;
+ for (int i = 0; i < 2; ++i)
+ y[i] = (*x)[1 - i];
+ return y;
+}
+
+V4SF
+v4sf (V4SF *x)
+{
+ V4SF y;
+ for (int i = 0; i < 4; ++i)
+ y[i] = (*x)[3 - i];
+ return y;
+}
+
+V2DF
+v2df (V2DF *x)
+{
+ V2DF y;
+ for (int i = 0; i < 2; ++i)
+ y[i] = (*x)[1 - i];
+ return y;
+}
new file mode 100644
@@ -0,0 +1,67 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z15" } */
+/* { dg-require-effective-target s390_vxe2 } */
+/* { dg-final { scan-assembler-times {\tvlbrq\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvler[hfg]\t} 5 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef signed char __attribute__ ((vector_size (16))) V16QI;
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef float __attribute__ ((vector_size (16))) V4SF;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+V16QI
+v16qi (V16QI *x)
+{
+ V16QI y;
+ for (int i = 0; i < 16; ++i)
+ y[i] = (*x)[15 - i];
+ return y;
+}
+
+V8HI
+v8hi (V8HI *x)
+{
+ V8HI y;
+ for (int i = 0; i < 8; ++i)
+ y[i] = (*x)[7 - i];
+ return y;
+}
+
+V4SI
+v4si (V4SI *x)
+{
+ V4SI y;
+ for (int i = 0; i < 4; ++i)
+ y[i] = (*x)[3 - i];
+ return y;
+}
+
+V2DI
+v2di (V2DI *x)
+{
+ V2DI y;
+ for (int i = 0; i < 2; ++i)
+ y[i] = (*x)[1 - i];
+ return y;
+}
+
+V4SF
+v4sf (V4SF *x)
+{
+ V4SF y;
+ for (int i = 0; i < 4; ++i)
+ y[i] = (*x)[3 - i];
+ return y;
+}
+
+V2DF
+v2df (V2DF *x)
+{
+ V2DF y;
+ for (int i = 0; i < 2; ++i)
+ y[i] = (*x)[1 - i];
+ return y;
+}
new file mode 100644
@@ -0,0 +1,56 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z14" } */
+/* { dg-require-effective-target s390_vxe } */
+/* { dg-final { scan-assembler-times {\tvpdi\t} 5 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef float __attribute__ ((vector_size (16))) V4SF;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+void
+v8hi (V8HI *x, V8HI y)
+{
+ V8HI z;
+ for (int i = 0; i < 8; ++i)
+ z[i] = y[7 - i];
+ *x = z;
+}
+
+void
+v4si (V4SI *x, V4SI y)
+{
+ V4SI z;
+ for (int i = 0; i < 4; ++i)
+ z[i] = y[3 - i];
+ *x = z;
+}
+
+void
+v2di (V2DI *x, V2DI y)
+{
+ V2DI z;
+ for (int i = 0; i < 2; ++i)
+ z[i] = y[1 - i];
+ *x = z;
+}
+
+void
+v4sf (V4SF *x, V4SF y)
+{
+ V4SF z;
+ for (int i = 0; i < 4; ++i)
+ z[i] = y[3 - i];
+ *x = z;
+}
+
+void
+v2df (V2DF *x, V2DF y)
+{
+ V2DF z;
+ for (int i = 0; i < 2; ++i)
+ z[i] = y[1 - i];
+ *x = z;
+}
new file mode 100644
@@ -0,0 +1,67 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z15" } */
+/* { dg-require-effective-target s390_vxe2 } */
+/* { dg-final { scan-assembler-times {\tvstbrq\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvster[hfg]\t} 5 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef signed char __attribute__ ((vector_size (16))) V16QI;
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef float __attribute__ ((vector_size (16))) V4SF;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+void
+v16qi (V16QI *x, V16QI y)
+{
+ V16QI z;
+ for (int i = 0; i < 16; ++i)
+ z[i] = y[15 - i];
+ *x = z;
+}
+
+void
+v8hi (V8HI *x, V8HI y)
+{
+ V8HI z;
+ for (int i = 0; i < 8; ++i)
+ z[i] = y[7 - i];
+ *x = z;
+}
+
+void
+v4si (V4SI *x, V4SI y)
+{
+ V4SI z;
+ for (int i = 0; i < 4; ++i)
+ z[i] = y[3 - i];
+ *x = z;
+}
+
+void
+v2di (V2DI *x, V2DI y)
+{
+ V2DI z;
+ for (int i = 0; i < 2; ++i)
+ z[i] = y[1 - i];
+ *x = z;
+}
+
+void
+v4sf (V4SF *x, V4SF y)
+{
+ V4SF z;
+ for (int i = 0; i < 4; ++i)
+ z[i] = y[3 - i];
+ *x = z;
+}
+
+void
+v2df (V2DF *x, V2DF y)
+{
+ V2DF z;
+ for (int i = 0; i < 2; ++i)
+ z[i] = y[1 - i];
+ *x = z;
+}
new file mode 100644
@@ -0,0 +1,67 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z15" } */
+/* { dg-require-effective-target s390_vxe2 } */
+/* { dg-final { scan-assembler-times {\tvstbrq\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvster[hfg]\t} 5 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef signed char __attribute__ ((vector_size (16))) V16QI;
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef float __attribute__ ((vector_size (16))) V4SF;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+void
+v16qi (V16QI *x, V16QI *y)
+{
+ V16QI z;
+ for (int i = 0; i < 16; ++i)
+ z[i] = (*y)[15 - i];
+ *x = z;
+}
+
+void
+v8hi (V8HI *x, V8HI *y)
+{
+ V8HI z;
+ for (int i = 0; i < 8; ++i)
+ z[i] = (*y)[7 - i];
+ *x = z;
+}
+
+void
+v4si (V4SI *x, V4SI *y)
+{
+ V4SI z;
+ for (int i = 0; i < 4; ++i)
+ z[i] = (*y)[3 - i];
+ *x = z;
+}
+
+void
+v2di (V2DI *x, V2DI *y)
+{
+ V2DI z;
+ for (int i = 0; i < 2; ++i)
+ z[i] = (*y)[1 - i];
+ *x = z;
+}
+
+void
+v4sf (V4SF *x, V4SF *y)
+{
+ V4SF z;
+ for (int i = 0; i < 4; ++i)
+ z[i] = (*y)[3 - i];
+ *x = z;
+}
+
+void
+v2df (V2DF *x, V2DF *y)
+{
+ V2DF z;
+ for (int i = 0; i < 2; ++i)
+ z[i] = (*y)[1 - i];
+ *x = z;
+}
@@ -21,4 +21,6 @@ baz (signed short *x)
return vec_reve (vec_xl (0, x));
}
-/* { dg-final { scan-assembler-times "vperm\t" 3 } } */
+/* { dg-final { scan-assembler-times "vpdi\t" 3 } } */
+/* { dg-final { scan-assembler-times "verllg\t" 3 } } */
+/* { dg-final { scan-assembler-times "verllf\t" 3 } } */
@@ -9,7 +9,9 @@ foo (vector signed short x)
return vec_reve (x);
}
-/* { dg-final { scan-assembler-times "vperm\t" 1 } } */
+/* { dg-final { scan-assembler-times "vpdi\t" 1 } } */
+/* { dg-final { scan-assembler-times "verllg\t" 1 } } */
+/* { dg-final { scan-assembler-times "verllf\t" 1 } } */
vector signed short