@@ -15592,6 +15592,17 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
}
goto widen;
+ case E_V4HFmode:
+ case E_V4BFmode:
+ if (TARGET_MMX_WITH_SSE)
+ {
+ val = force_reg (GET_MODE_INNER (mode), val);
+ rtx x = gen_rtx_VEC_DUPLICATE (mode, val);
+ emit_insn (gen_rtx_SET (target, x));
+ return true;
+ }
+ return false;
+
case E_V2HImode:
if (TARGET_SSE2)
{
@@ -15605,6 +15616,17 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
}
return false;
+ case E_V2HFmode:
+ case E_V2BFmode:
+ if (TARGET_SSE2)
+ {
+ val = force_reg (GET_MODE_INNER (mode), val);
+ rtx x = gen_rtx_VEC_DUPLICATE (mode, val);
+ emit_insn (gen_rtx_SET (target, x));
+ return true;
+ }
+ return false;
+
case E_V8QImode:
case E_V4QImode:
if (!mmx_ok)
@@ -15815,6 +15837,8 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
use_vector_set = TARGET_MMX_WITH_SSE && TARGET_SSE4_1;
break;
case E_V4HImode:
+ case E_V4HFmode:
+ case E_V4BFmode:
use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
break;
case E_V4QImode:
@@ -16051,6 +16075,8 @@ ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
case E_V4SImode:
case E_V8HImode:
case E_V4HImode:
+ case E_V4HFmode:
+ case E_V4BFmode:
break;
case E_V16QImode:
@@ -16438,6 +16464,7 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
rtx ops[64], op0, op1, op2, op3, op4, op5;
machine_mode half_mode = VOIDmode;
machine_mode quarter_mode = VOIDmode;
+ machine_mode int_inner_mode = VOIDmode;
int n, i;
switch (mode)
@@ -16582,6 +16609,13 @@ quarter:
ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
return;
+ case E_V4HFmode:
+ case E_V4BFmode:
+ case E_V2HFmode:
+ case E_V2BFmode:
+ int_inner_mode = HImode;
+ break;
+
case E_V4HImode:
case E_V8QImode:
@@ -16613,6 +16647,16 @@ quarter:
for (j = 0; j < n_elt_per_word; ++j)
{
rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
+ if (int_inner_mode != E_VOIDmode)
+ {
+ gcc_assert (TARGET_SSE2 && int_inner_mode == HImode);
+ rtx tmp = gen_reg_rtx (int_inner_mode);
+ elt = lowpart_subreg (int_inner_mode,
+ force_reg (inner_mode, elt),
+ inner_mode);
+ emit_move_insn (tmp, elt);
+ elt = tmp;
+ }
elt = convert_modes (tmp_mode, inner_mode, elt, true);
if (j == 0)
@@ -16839,6 +16883,14 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
case E_V16SFmode:
cmp_mode = V16SImode;
break;
+ case E_V2HFmode:
+ case E_V2BFmode:
+ cmp_mode = V2HImode;
+ break;
+ case E_V4HFmode:
+ case E_V4BFmode:
+ cmp_mode = V4HImode;
+ break;
case E_V8HFmode:
cmp_mode = V8HImode;
break;
@@ -17085,9 +17137,13 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
case E_V8HFmode:
case E_V8BFmode:
case E_V2HImode:
+ case E_V2HFmode:
+ case E_V2BFmode:
use_vec_merge = TARGET_SSE2;
break;
case E_V4HImode:
+ case E_V4HFmode:
+ case E_V4BFmode:
use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
break;
@@ -17428,9 +17484,13 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
case E_V8HFmode:
case E_V8BFmode:
case E_V2HImode:
+ case E_V2HFmode:
+ case E_V2BFmode:
use_vec_extr = TARGET_SSE2;
break;
case E_V4HImode:
+ case E_V4HFmode:
+ case E_V4BFmode:
use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
break;
@@ -112,11 +112,21 @@ (define_mode_attr mmxintvecmodelower
;; Mapping of vector modes to a vector mode of double size
(define_mode_attr mmxdoublevecmode
- [(V2SF "V4SF") (V2SI "V4SI") (V4HF "V8HF") (V4HI "V8HI")])
+ [(V2SF "V4SF") (V2SI "V4SI") (V4HF "V8HF") (V4HI "V8HI")
+ (V2HI "V4HI") (V2HF "V4HF") (V2BF "V4BF")])
;; Mapping of vector modes back to the scalar modes
(define_mode_attr mmxscalarmode
- [(V2SI "SI") (V2SF "SF")])
+ [(V2SI "SI") (V2SF "SF")
+ (V4HF "HF") (V4BF "BF")
+ (V2HF "HF") (V2BF "BF")
+ (V4HI "HI") (V2HI "HI")])
+
+(define_mode_attr mmxscalarmodelower
+ [(V2SI "si") (V2SF "sf")
+ (V4HF "hf") (V4BF "bf")
+ (V2HF "hf") (V2BF "bf")
+ (V4HI "hi") (V2HI "hi")])
(define_mode_attr Yv_Yw
[(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")])
@@ -4882,11 +4892,11 @@ (define_insn "*mmx_pinsrd"
(set_attr "mode" "TI")])
(define_insn "*mmx_pinsrw"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,YW")
- (vec_merge:V4HI
- (vec_duplicate:V4HI
- (match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
- (match_operand:V4HI 1 "register_operand" "0,0,YW")
+ [(set (match_operand:V4FI_64 0 "register_operand" "=y,x,YW,&x")
+ (vec_merge:V4FI_64
+ (vec_duplicate:V4FI_64
+ (match_operand:<mmxscalarmode> 2 "nonimmediate_operand" "rm,rm,rm,x"))
+ (match_operand:V4FI_64 1 "register_operand" "0,0,YW,x")
(match_operand:SI 3 "const_int_operand")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)
@@ -4896,6 +4906,8 @@ (define_insn "*mmx_pinsrw"
operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
switch (which_alternative)
{
+ case 3:
+ return "#";
case 2:
if (MEM_P (operands[2]))
return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
@@ -4911,11 +4923,28 @@ (define_insn "*mmx_pinsrw"
gcc_unreachable ();
}
}
- [(set_attr "isa" "*,sse2_noavx,avx")
- (set_attr "mmx_isa" "native,*,*")
- (set_attr "type" "mmxcvt,sselog,sselog")
+ [(set_attr "isa" "*,sse2_noavx,avx,sse4")
+ (set_attr "mmx_isa" "native,*,*,*")
+ (set_attr "type" "mmxcvt,sselog,sselog,sselog")
(set_attr "length_immediate" "1")
- (set_attr "mode" "DI,TI,TI")])
+ (set_attr "mode" "DI,TI,TI,TI")])
+
+;; For TARGET_SSE2, implement insert from XMM reg with PSHULFW + PBLENDW.
+(define_split
+ [(set (match_operand:V4FI_64 0 "sse_reg_operand")
+ (vec_merge:V4FI_64
+ (vec_duplicate:V4FI_64
+ (match_operand:<mmxscalarmode> 2 "sse_reg_operand"))
+ (match_operand:V4FI_64 1 "sse_reg_operand")
+ (match_operand:SI 3 "const_int_operand")))]
+ "TARGET_MMX_WITH_SSE && TARGET_SSE4_1 && reload_completed
+ && ((unsigned) exact_log2 (INTVAL (operands[3]))
+ < GET_MODE_NUNITS (<MODE>mode))"
+ [(set (match_dup 0)
+ (vec_duplicate:V4FI_64 (match_dup 2)))
+ (set (match_dup 0)
+ (vec_merge:V4FI_64 (match_dup 1) (match_dup 0) (match_dup 3)))]
+ "operands[3] = GEN_INT (~INTVAL (operands[3]) & 0xf);")
(define_insn "*mmx_pinsrb"
[(set (match_operand:V8QI 0 "register_operand" "=x,YW")
@@ -4973,6 +5002,41 @@ (define_insn "*mmx_pextrw"
(set_attr "prefix" "orig,maybe_vex,maybe_vex,maybe_evex")
(set_attr "mode" "DI,TI,TI,TI")])
+(define_insn "*mmx_pextrw<mode>"
+ [(set (match_operand:<mmxscalarmode> 0 "register_sse4nonimm_operand" "=?r,?r,jm,m,x,Yw")
+ (vec_select:<mmxscalarmode>
+ (match_operand:V4F_64 1 "register_operand" "y,YW,YW,YW,0,YW")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
+ case 2:
+ case 3:
+ return "%vpextrw\t{%2, %1, %0|%0, %1, %2}";
+ case 4:
+ operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
+ return "psrldq\t{%2, %0|%0, %2}";
+ case 5:
+ operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
+ return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "isa" "*,sse2,sse4_noavx,avx,noavx,avx")
+ (set_attr "addr" "*,*,gpr16,*,*,*")
+ (set_attr "mmx_isa" "native,*,*,*,*,*")
+ (set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,sseishft1,sseishft1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "orig,maybe_vex,maybe_vex,maybe_evex,orig,maybe_evex")
+ (set_attr "mode" "DI,TI,TI,TI,TI,TI")])
+
(define_insn "*mmx_pextrw_zext"
[(set (match_operand:SWI48 0 "register_operand" "=r,r")
(zero_extend:SWI48
@@ -5069,18 +5133,18 @@ (define_expand "mmx_pshufw"
&& (TARGET_SSE || TARGET_3DNOW_A)"
{
int mask = INTVAL (operands[2]);
- emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
- GEN_INT ((mask >> 0) & 3),
- GEN_INT ((mask >> 2) & 3),
- GEN_INT ((mask >> 4) & 3),
- GEN_INT ((mask >> 6) & 3)));
+ emit_insn (gen_mmx_pshufwv4hi_1 (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3)));
DONE;
})
-(define_insn "mmx_pshufw_1"
- [(set (match_operand:V4HI 0 "register_operand" "=y,Yw")
- (vec_select:V4HI
- (match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yw")
+(define_insn "mmx_pshufw<mode>_1"
+ [(set (match_operand:V4FI_64 0 "register_operand" "=y,Yw")
+ (vec_select:V4FI_64
+ (match_operand:V4FI_64 1 "register_mmxmem_operand" "ym,Yw")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
@@ -5134,10 +5198,10 @@ (define_insn "*mmx_pshufd_1"
(set_attr "mode" "TI")])
(define_insn "*mmx_pblendw64"
- [(set (match_operand:V4HI 0 "register_operand" "=Yr,*x,x")
- (vec_merge:V4HI
- (match_operand:V4HI 2 "register_operand" "Yr,*x,x")
- (match_operand:V4HI 1 "register_operand" "0,0,x")
+ [(set (match_operand:V4FI_64 0 "register_operand" "=Yr,*x,x")
+ (vec_merge:V4FI_64
+ (match_operand:V4FI_64 2 "register_operand" "Yr,*x,x")
+ (match_operand:V4FI_64 1 "register_operand" "0,0,x")
(match_operand:SI 3 "const_0_to_15_operand")))]
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
"@
@@ -5152,10 +5216,10 @@ (define_insn "*mmx_pblendw64"
(set_attr "mode" "TI")])
(define_insn "*mmx_pblendw32"
- [(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,x")
- (vec_merge:V2HI
- (match_operand:V2HI 2 "register_operand" "Yr,*x,x")
- (match_operand:V2HI 1 "register_operand" "0,0,x")
+ [(set (match_operand:V2FI_32 0 "register_operand" "=Yr,*x,x")
+ (vec_merge:V2FI_32
+ (match_operand:V2FI_32 2 "register_operand" "Yr,*x,x")
+ (match_operand:V2FI_32 1 "register_operand" "0,0,x")
(match_operand:SI 3 "const_0_to_7_operand")))]
"TARGET_SSE4_1"
"@
@@ -5212,6 +5276,16 @@ (define_insn "*vec_dupv4hi"
(set_attr "length_immediate" "1")
(set_attr "mode" "DI,TI")])
+(define_insn "*vec_dup<mode>"
+ [(set (match_operand:V4F_64 0 "register_operand" "=Yw")
+ (vec_duplicate:V4F_64
+ (match_operand:<mmxscalarmode> 1 "register_operand" "Yw")))]
+ "TARGET_MMX_WITH_SSE"
+ "%vpshuflw\t{$0, %1, %0|%0, %1, 0}"
+ [(set_attr "isa" "sse2")
+ (set_attr "type" "sselog1")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "TI")])
(define_insn "*vec_dupv2si"
[(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
@@ -5405,9 +5479,9 @@ (define_expand "vec_initv2sisi"
DONE;
})
-(define_expand "vec_setv4hi"
- [(match_operand:V4HI 0 "register_operand")
- (match_operand:HI 1 "register_operand")
+(define_expand "vec_set<mode>"
+ [(match_operand:V4FI_64 0 "register_operand")
+ (match_operand:<mmxscalarmode> 1 "register_operand")
(match_operand 2 "vec_setm_mmx_operand")]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
{
@@ -5419,9 +5493,9 @@ (define_expand "vec_setv4hi"
DONE;
})
-(define_expand "vec_extractv4hihi"
- [(match_operand:HI 0 "register_operand")
- (match_operand:V4HI 1 "register_operand")
+(define_expand "vec_extract<mode><mmxscalarmodelower>"
+ [(match_operand:<mmxscalarmode> 0 "register_operand")
+ (match_operand:V4FI_64 1 "register_operand")
(match_operand 2 "const_int_operand")]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
{
@@ -5440,6 +5514,16 @@ (define_expand "vec_initv4hihi"
DONE;
})
+(define_expand "vec_init<mode><mmxscalarmodelower>"
+ [(match_operand:V4F_64 0 "register_operand")
+ (match_operand 1)]
+ "TARGET_MMX_WITH_SSE"
+{
+ ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
+ operands[1]);
+ DONE;
+})
+
(define_expand "vec_setv8qi"
[(match_operand:V8QI 0 "register_operand")
(match_operand:QI 1 "register_operand")
@@ -5476,11 +5560,11 @@ (define_expand "vec_initv8qiqi"
})
(define_insn "*pinsrw"
- [(set (match_operand:V2HI 0 "register_operand" "=x,YW")
- (vec_merge:V2HI
- (vec_duplicate:V2HI
- (match_operand:HI 2 "nonimmediate_operand" "rm,rm"))
- (match_operand:V2HI 1 "register_operand" "0,YW")
+ [(set (match_operand:V2FI_32 0 "register_operand" "=x,YW,&x")
+ (vec_merge:V2FI_32
+ (vec_duplicate:V2FI_32
+ (match_operand:<mmxscalarmode> 2 "nonimmediate_operand" "rm,rm,x"))
+ (match_operand:V2FI_32 1 "register_operand" "0,YW,x")
(match_operand:SI 3 "const_int_operand")))]
"TARGET_SSE2
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
@@ -5489,6 +5573,8 @@ (define_insn "*pinsrw"
operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
switch (which_alternative)
{
+ case 2:
+ return "#";
case 1:
if (MEM_P (operands[2]))
return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
@@ -5503,11 +5589,29 @@ (define_insn "*pinsrw"
gcc_unreachable ();
}
}
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,sse4")
(set_attr "type" "sselog")
(set_attr "length_immediate" "1")
(set_attr "mode" "TI")])
+;; For TARGET_SSE2, implement insert from XMM reg with PSHULFW + PBLENDW.
+(define_split
+ [(set (match_operand:V2FI_32 0 "sse_reg_operand")
+ (vec_merge:V2FI_32
+ (vec_duplicate:V2FI_32
+ (match_operand:<mmxscalarmode> 2 "sse_reg_operand"))
+ (match_operand:V2FI_32 1 "sse_reg_operand")
+ (match_operand:SI 3 "const_int_operand")))]
+ "TARGET_SSE4_1 && reload_completed
+ && ((unsigned) exact_log2 (INTVAL (operands[3]))
+ < GET_MODE_NUNITS (<MODE>mode))"
+ [(set (match_dup 0)
+ (vec_duplicate:V2FI_32 (match_dup 2)))
+ (set (match_dup 0)
+ (vec_merge:V2FI_32 (match_dup 1) (match_dup 0) (match_dup 3)))]
+ "operands[3] = GEN_INT (~INTVAL (operands[3]) & 0x3);")
+
+
(define_insn "*pinsrb"
[(set (match_operand:V4QI 0 "register_operand" "=x,YW")
(vec_merge:V4QI
@@ -5561,6 +5665,39 @@ (define_insn "*pextrw"
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_insn "*pextrw<mode>"
+ [(set (match_operand:<mmxscalarmode> 0 "register_sse4nonimm_operand" "=?r,jm,m,x,Yw")
+ (vec_select:<mmxscalarmode>
+ (match_operand:V2F_32 1 "register_operand" "YW,YW,YW,0,YW")
+ (parallel [(match_operand:SI 2 "const_0_to_1_operand")])))]
+ "TARGET_SSE2"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
+ case 1:
+ return "pextrw\t{%2, %1, %0|%0, %1, %2}";
+ case 2:
+ return "vpextrw\t{%2, %1, %0|%0, %1, %2}";
+ case 3:
+ operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
+ return "psrldq\t{%2, %0|%0, %2}";
+ case 4:
+ operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
+ return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "isa" "*,sse4_noavx,avx,noavx,avx")
+ (set_attr "addr" "*,gpr16,*,*,*")
+ (set_attr "type" "sselog1,sselog1,sselog1,sseishft1,sseishft1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "maybe_vex,orig,maybe_evex,orig,maybe_evex")
+ (set_attr "mode" "TI")])
+
(define_insn "*pextrw_zext"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(zero_extend:SWI48
@@ -5608,9 +5745,9 @@ (define_insn "*pextrb_zext"
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_expand "vec_setv2hi"
- [(match_operand:V2HI 0 "register_operand")
- (match_operand:HI 1 "register_operand")
+(define_expand "vec_set<mode>"
+ [(match_operand:V2FI_32 0 "register_operand")
+ (match_operand:<mmxscalarmode> 1 "register_operand")
(match_operand 2 "vec_setm_sse41_operand")]
"TARGET_SSE2"
{
@@ -5622,9 +5759,9 @@ (define_expand "vec_setv2hi"
DONE;
})
-(define_expand "vec_extractv2hihi"
- [(match_operand:HI 0 "register_operand")
- (match_operand:V2HI 1 "register_operand")
+(define_expand "vec_extract<mode><mmxscalarmodelower>"
+ [(match_operand:<mmxscalarmode> 0 "register_operand")
+ (match_operand:V2FI_32 1 "register_operand")
(match_operand 2 "const_int_operand")]
"TARGET_SSE2"
{
@@ -5659,29 +5796,29 @@ (define_expand "vec_extractv4qiqi"
})
(define_insn_and_split "*punpckwd"
- [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
- (vec_select:V2HI
- (vec_concat:V4HI
- (match_operand:V2HI 1 "register_operand" "0,Yw")
- (match_operand:V2HI 2 "register_operand" "x,Yw"))
+ [(set (match_operand:V2FI_32 0 "register_operand" "=x,Yw")
+ (vec_select:V2FI_32
+ (vec_concat:<mmxdoublevecmode>
+ (match_operand:V2FI_32 1 "register_operand" "0,Yw")
+ (match_operand:V2FI_32 2 "register_operand" "x,Yw"))
(parallel [(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")])))]
"TARGET_SSE2"
"#"
"&& reload_completed"
[(set (match_dup 5)
- (vec_select:V8HI
+ (vec_select:<mmxxmmmode>
(match_dup 5)
(parallel [(match_dup 3) (match_dup 4)
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
{
- rtx dest = lowpart_subreg (V8HImode, operands[0], V2HImode);
- rtx op1 = lowpart_subreg (V8HImode, operands[1], V2HImode);
- rtx op2 = lowpart_subreg (V8HImode, operands[2], V2HImode);
+ rtx dest = lowpart_subreg (<mmxxmmmode>mode, operands[0], <MODE>mode);
+ rtx op1 = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
+ rtx op2 = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
- emit_insn (gen_vec_interleave_lowv8hi (dest, op1, op2));
+ emit_insn (gen_vec_interleave_low<mmxxmmmodelower> (dest, op1, op2));
static const int map[4] = { 0, 2, 1, 3 };
@@ -5699,10 +5836,10 @@ (define_insn_and_split "*punpckwd"
(set_attr "type" "sselog")
(set_attr "mode" "TI")])
-(define_insn "*pshufw_1"
- [(set (match_operand:V2HI 0 "register_operand" "=Yw")
- (vec_select:V2HI
- (match_operand:V2HI 1 "register_operand" "Yw")
+(define_insn "*pshufw<mode>_1"
+ [(set (match_operand:V2FI_32 0 "register_operand" "=Yw")
+ (vec_select:V2FI_32
+ (match_operand:V2FI_32 1 "register_operand" "Yw")
(parallel [(match_operand 2 "const_0_to_1_operand")
(match_operand 3 "const_0_to_1_operand")])))]
"TARGET_SSE2"
@@ -5731,8 +5868,18 @@ (define_insn "*vec_dupv2hi"
(set_attr "length_immediate" "1")
(set_attr "mode" "TI")])
-(define_expand "vec_initv2hihi"
- [(match_operand:V2HI 0 "register_operand")
+(define_insn "*vec_dup<mode>"
+ [(set (match_operand:V2F_32 0 "register_operand" "=Yw")
+ (vec_duplicate:V2F_32
+ (match_operand:<mmxscalarmode> 1 "register_operand" "Yw")))]
+ "TARGET_SSE2"
+ "%vpshuflw\t{$0, %1, %0|%0, %1, 0}"
+ [(set_attr "type" "sselog1")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "vec_init<mode><mmxscalarmodelower>"
+ [(match_operand:V2FI_32 0 "register_operand")
(match_operand 1)]
"TARGET_SSE2"
{
@@ -12372,9 +12372,9 @@ (define_insn_and_split "*vec_extract<mode>_0"
"operands[1] = gen_lowpart (<ssescalarmode>mode, operands[1]);")
(define_insn "*vec_extract<mode>"
- [(set (match_operand:HFBF 0 "register_sse4nonimm_operand" "=?r,jm,m,x,v")
+ [(set (match_operand:HFBF 0 "register_sse4nonimm_operand" "=?r,jm,m,x,Yw")
(vec_select:HFBF
- (match_operand:<ssevecmode> 1 "register_operand" "v,x,v,0,v")
+ (match_operand:<ssevecmode> 1 "register_operand" "v,x,v,0,YW")
(parallel
[(match_operand:SI 2 "const_0_to_7_operand")])))]
"TARGET_SSE2"
new file mode 100644
@@ -0,0 +1,135 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O1 -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+
+#include "sse4_1-check.h"
+
+typedef _Float16 v4hf __attribute__((vector_size(8)));
+
+v4hf
+__attribute__((noipa))
+vector_init_dupv4hf (_Float16 a)
+{
+ return __extension__(v4hf){a, a, a, a};
+}
+
+v4hf
+__attribute__((noipa))
+vector_init_allzero (_Float16 a)
+{
+ return __extension__(v4hf){0, 0, 0, 0};
+}
+
+v4hf
+__attribute__((noipa))
+vector_init_one_nonzero (_Float16 a)
+{
+ return __extension__(v4hf){0, 0, a, 0};
+}
+
+v4hf
+__attribute__((noipa))
+vector_init_one_var (_Float16 a)
+{
+ return __extension__(v4hf){1, 2, a, 4};
+}
+
+v4hf
+__attribute__((noipa))
+vector_init_general (_Float16 a, _Float16 a1, _Float16 a2, _Float16 a3)
+{
+ return __extension__(v4hf){a3, a2, a1, a};
+}
+
+v4hf
+__attribute__((noipa))
+vec_set (_Float16 a, v4hf b)
+{
+ b[1] = a;
+ return b;
+}
+
+v4hf
+__attribute__((noipa))
+vec_set_var (_Float16 a, v4hf b, int c)
+{
+ b[c] = a;
+ return b;
+}
+
+_Float16
+__attribute__((noipa))
+vec_extract (v4hf b)
+{
+ return b[2];
+}
+
+static void
+sse4_1_test ()
+{
+ typedef union {
+ _Float16 a[4];
+ v4hf x;}union64hf;
+ union64hf res, exp, src;
+
+ res.x = vector_init_dupv4hf (1.0f16);
+ for (int i = 0; i != 4; i++)
+ exp.a[i] = 1.0f16;
+ if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
+ __builtin_abort ();
+
+ res.x = vector_init_allzero (1.0f16);
+ for (int i = 0; i != 4; i++)
+ exp.a[i] = 0.0f16;
+ if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
+ __builtin_abort ();
+
+ res.x = vector_init_one_nonzero (1.0f16);
+ for (int i = 0; i != 4; i++)
+ exp.a[i] = 0.0f16;
+ exp.a[2] = 1.0f16;
+ if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
+ __builtin_abort ();
+
+ res.x = vector_init_one_var (3.0f16);
+ for (int i = 0; i != 4; i++)
+ exp.a[i] = i + 1;
+ if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
+ __builtin_abort ();
+
+ res.x = vector_init_general (4.0, 3.0f, 2.0f, 1.0);
+ for (int i = 0; i != 4; i++)
+ exp.a[i] = 1 + i;
+ if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
+ __builtin_abort ();
+
+ for (int i = 0; i != 4; i++)
+ {
+ src.a[i] = i;
+ exp.a[i] = i;
+ }
+ res.x = vec_set (3.0f, src.x);
+ exp.a[1] = 3.0f;
+ if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
+ __builtin_abort ();
+
+ for (int i = 0; i != 4; i++)
+ {
+ src.a[i] = i;
+ exp.a[i] = i;
+ }
+ res.x = vec_set_var (3.0f, src.x, 1);
+ exp.a[1] = 3.0f;
+ if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
+ __builtin_abort ();
+
+ for (int i = 0; i != 4; i++)
+ {
+ src.a[i] = i;
+ exp.a[i] = i;
+ }
+ _Float16 res_scalar = vec_extract (src.x);
+ if (res_scalar != 2.0f)
+ __builtin_abort ();
+ return ;
+}
new file mode 100644
@@ -0,0 +1,135 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O1 -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+
+#include "sse4_1-check.h"
+
+typedef _Float16 v2hf __attribute__((vector_size(4)));
+
+v2hf
+__attribute__((noipa))
+vector_init_dupv2hf (_Float16 a)
+{
+ return __extension__(v2hf){a, a};
+}
+
+v2hf
+__attribute__((noipa))
+vector_init_allzero (_Float16 a)
+{
+ return __extension__(v2hf){0, 0};
+}
+
+v2hf
+__attribute__((noipa))
+vector_init_one_nonzero (_Float16 a)
+{
+ return __extension__(v2hf){0, a};
+}
+
+v2hf
+__attribute__((noipa))
+vector_init_one_var (_Float16 a)
+{
+ return __extension__(v2hf){1, a};
+}
+
+v2hf
+__attribute__((noipa))
+vector_init_general (_Float16 a1, _Float16 a2)
+{
+ return __extension__(v2hf){a2, a1};
+}
+
+v2hf
+__attribute__((noipa))
+vec_set (_Float16 a, v2hf b)
+{
+ b[1] = a;
+ return b;
+}
+
+v2hf
+__attribute__((noipa))
+vec_set_var (_Float16 a, v2hf b, int c)
+{
+ b[c] = a;
+ return b;
+}
+
+_Float16
+__attribute__((noipa))
+vec_extract (v2hf b)
+{
+ return b[1];
+}
+
+static void
+sse4_1_test ()
+{
+ typedef union {
+ _Float16 a[2];
+ v2hf x;}union64hf;
+ union64hf res, exp, src;
+
+ res.x = vector_init_dupv2hf (1.0f16);
+ for (int i = 0; i != 2; i++)
+ exp.a[i] = 1.0f16;
+ if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
+ __builtin_abort ();
+
+ res.x = vector_init_allzero (1.0f16);
+ for (int i = 0; i != 2; i++)
+ exp.a[i] = 0.0f16;
+ if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
+ __builtin_abort ();
+
+ res.x = vector_init_one_nonzero (1.0f16);
+ for (int i = 0; i != 2; i++)
+ exp.a[i] = 0.0f16;
+ exp.a[1] = 1.0f16;
+ if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
+ __builtin_abort ();
+
+ res.x = vector_init_one_var (3.0f16);
+ exp.a[0] = 1;
+ exp.a[1] = 3;
+ if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
+ __builtin_abort ();
+
+ res.x = vector_init_general (2.0f, 1.0);
+ for (int i = 0; i != 2; i++)
+ exp.a[i] = 1 + i;
+ if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
+ __builtin_abort ();
+
+ for (int i = 0; i != 2; i++)
+ {
+ src.a[i] = i;
+ exp.a[i] = i;
+ }
+ res.x = vec_set (3.0f, src.x);
+ exp.a[1] = 3.0f;
+ if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
+ __builtin_abort ();
+
+ for (int i = 0; i != 2; i++)
+ {
+ src.a[i] = i;
+ exp.a[i] = i;
+ }
+ res.x = vec_set_var (3.0f, src.x, 1);
+ exp.a[1] = 3.0f;
+ if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
+ __builtin_abort ();
+
+ for (int i = 0; i != 2; i++)
+ {
+ src.a[i] = i;
+ exp.a[i] = i;
+ }
+ _Float16 res_scalar = vec_extract (src.x);
+ if (res_scalar != 1.0f)
+ __builtin_abort ();
+ return ;
+}