@@ -19007,9 +19007,10 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
;
else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
;
- else if (TARGET_SSE4_1 && (GET_MODE_SIZE (vmode) == 16
- || GET_MODE_SIZE (vmode) == 8
- || GET_MODE_SIZE (vmode) == 4))
+ else if (TARGET_SSE4_1
+ && (GET_MODE_SIZE (vmode) == 16
+ || (TARGET_MMX_WITH_SSE && GET_MODE_SIZE (vmode) == 8)
+ || GET_MODE_SIZE (vmode) == 4))
;
else
return false;
@@ -19042,6 +19043,8 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
case E_V8SFmode:
case E_V2DFmode:
case E_V4SFmode:
+ case E_V2SFmode:
+ case E_V2HImode:
case E_V4HImode:
case E_V8HImode:
case E_V8SImode:
@@ -19897,11 +19900,15 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
}
}
+ /* Try the SSE4.1 blend variable merge instructions. */
+ if (expand_vec_perm_blend (d))
+ return true;
+
/* Try movss/movsd instructions. */
if (expand_vec_perm_movs (d))
return true;
- /* Finally, try the fully general two operand permute. */
+ /* Try the fully general two operand permute. */
if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
d->testing_p))
return true;
@@ -19924,10 +19931,6 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
return true;
}
- /* Try the SSE4.1 blend variable merge instructions. */
- if (expand_vec_perm_blend (d))
- return true;
-
/* Try one of the AVX vpermil variable permutations. */
if (expand_vec_perm_vpermil (d))
return true;
@@ -1154,6 +1154,25 @@ (define_expand "vcond<mode>v2sf"
DONE;
})
+(define_insn "*mmx_blendps"
+ [(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x")
+ (vec_merge:V2SF
+ (match_operand:V2SF 2 "register_operand" "Yr,*x,x")
+ (match_operand:V2SF 1 "register_operand" "0,0,x")
+ (match_operand:SI 3 "const_0_to_3_operand")))]
+ "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+ "@
+ blendps\t{%3, %2, %0|%0, %2, %3}
+ blendps\t{%3, %2, %0|%0, %2, %3}
+ vblendps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix_data16" "1,1,*")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "orig,orig,vex")
+ (set_attr "mode" "V4SF")])
+
(define_insn "mmx_blendvps"
[(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x")
(unspec:V2SF
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O1 -msse2" } */
+/* { dg-options "-O1 -msse2 -mno-sse4" } */
#include <x86intrin.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -msse" } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
typedef unsigned int v4si __attribute__((vector_size(16)));
typedef float v4sf __attribute__((vector_size(16)));
@@ -7,7 +7,7 @@ typedef float v4sf __attribute__((vector_size(16)));
v4si foo(v4si x,v4si y) { return (v4si){y[0],x[1],x[2],x[3]}; }
v4sf bar(v4sf x,v4sf y) { return (v4sf){y[0],x[1],x[2],x[3]}; }
-/* { dg-final { scan-assembler-times "\tv?movss\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tmovss\t" 2 } } */
/* { dg-final { scan-assembler-not "movaps" } } */
/* { dg-final { scan-assembler-not "shufps" } } */
-/* { dg-final { scan-assembler-not "vpblendw" } } */
+/* { dg-final { scan-assembler-not "pblendw" } } */
@@ -1,5 +1,5 @@
/* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-options "-O2 -msse2 -mno-mmx -mno-sse4" } */
/* { dg-final { scan-assembler-times "pshufd" 1 } } */
/* { dg-final { scan-assembler-times "movd" 1 } } */
/* { dg-final { scan-assembler-not "%mm" } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-sse4" } */
+
+typedef unsigned int v2si __attribute__((vector_size(8)));
+typedef float v2sf __attribute__((vector_size(8)));
+
+v2si foo(v2si x,v2si y) { return (v2si){y[0],x[1]}; }
+v2sf bar(v2sf x,v2sf y) { return (v2sf){y[0],x[1]}; }
+
+/* { dg-final { scan-assembler-times "\tmovss\t" 2 } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+/* { dg-final { scan-assembler-not "pblendw" } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -msse2" } */
+/* { dg-options "-O2 -msse2 -mno-sse4" } */
typedef unsigned long long v2di __attribute__((vector_size(16)));
typedef double v2df __attribute__((vector_size(16)));
@@ -7,9 +7,9 @@ typedef double v2df __attribute__((vector_size(16)));
v2di foo(v2di x,v2di y) { return (v2di){y[0],x[1]}; }
v2df bar(v2df x,v2df y) { return (v2df){y[0],x[1]}; }
-/* { dg-final { scan-assembler-times "\tv?movsd\t" 2 } } */
-/* { dg-final { scan-assembler-not "v?shufpd" } } */
+/* { dg-final { scan-assembler-times "\tmovsd\t" 2 } } */
+/* { dg-final { scan-assembler-not "shufpd" } } */
/* { dg-final { scan-assembler-not "movdqa" } } */
/* { dg-final { scan-assembler-not "pshufd" } } */
-/* { dg-final { scan-assembler-not "v?punpckldq" } } */
-/* { dg-final { scan-assembler-not "v?movq" } } */
+/* { dg-final { scan-assembler-not "punpckldq" } } */
+/* { dg-final { scan-assembler-not "movq" } } */