Fix wrong code due to incorrest define_split
Checks
Commit Message
-(define_split
- [(set (match_operand:V2HI 0 "register_operand")
- (eq:V2HI
- (eq:V2HI
- (us_minus:V2HI
- (match_operand:V2HI 1 "register_operand")
- (match_operand:V2HI 2 "register_operand"))
- (match_operand:V2HI 3 "const0_operand"))
- (match_operand:V2HI 4 "const0_operand")))]
- "TARGET_SSE4_1"
- [(set (match_dup 0)
- (umin:V2HI (match_dup 1) (match_dup 2)))
- (set (match_dup 0)
- (eq:V2HI (match_dup 0) (match_dup 2)))])
the splitter is wrong when op1 == op2.(the original pattern returns 0, after split, it returns 1)
So remove the splitter.
Also extend another define_split to define_insn_and_split to handle
below pattern
494(set (reg:V4QI 112)
495 (unspec:V4QI [
496 (subreg:V4QI (reg:V2HF 111 [ bf ]) 0)
497 (subreg:V4QI (reg:V2HF 110 [ af ]) 0)
498 (subreg:V4QI (eq:V2HI (eq:V2HI (reg:V2HI 105)
499 (const_vector:V2HI [
500 (const_int 0 [0]) repeated x2
501 ]))
502 (const_vector:V2HI [
503 (const_int 0 [0]) repeated x2
504 ])) 0)
505 ] UNSPEC_BLENDV))
define_split doesn't work since pass_combine assumes it produces at
most 2 insns after split, but here it produces 3 since we need to move
const0_rtx (V2HImode) to reg. The move insn can be eliminated later.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.
gcc/ChangeLog:
PR target/112276
* config/i386/mmx.md (*mmx_pblendvb_v8qi_1): Change
define_split to define_insn_and_split to handle
immediate_operand for comparison.
(*mmx_pblendvb_v8qi_2): Ditto.
(*mmx_pblendvb_<mode>_1): Ditto.
(*mmx_pblendvb_v4qi_2): Ditto.
(<code><mode>3): Remove define_split after it.
(<code>v8qi3): Ditto.
(<code><mode>3): Ditto.
(<ode>v2hi3): Ditto.
gcc/testsuite/ChangeLog:
* g++.target/i386/part-vect-vcondhf.C: Adjust testcase.
* gcc.target/i386/pr112276.c: New test.
---
gcc/config/i386/mmx.md | 112 ++++++------------
.../g++.target/i386/part-vect-vcondhf.C | 1 -
gcc/testsuite/gcc.target/i386/pr112276.c | 36 ++++++
3 files changed, 70 insertions(+), 79 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr112276.c
@@ -3360,21 +3360,6 @@ (define_insn "<code><mode>3"
(set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "TI")])
-(define_split
- [(set (match_operand:V4HI 0 "register_operand")
- (eq:V4HI
- (eq:V4HI
- (us_minus:V4HI
- (match_operand:V4HI 1 "register_operand")
- (match_operand:V4HI 2 "register_operand"))
- (match_operand:V4HI 3 "const0_operand"))
- (match_operand:V4HI 4 "const0_operand")))]
- "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
- [(set (match_dup 0)
- (umin:V4HI (match_dup 1) (match_dup 2)))
- (set (match_dup 0)
- (eq:V4HI (match_dup 0) (match_dup 2)))])
-
(define_expand "mmx_<code>v8qi3"
[(set (match_operand:V8QI 0 "register_operand")
(umaxmin:V8QI
@@ -3408,21 +3393,6 @@ (define_expand "<code>v8qi3"
(match_operand:V8QI 2 "register_operand")))]
"TARGET_MMX_WITH_SSE")
-(define_split
- [(set (match_operand:V8QI 0 "register_operand")
- (eq:V8QI
- (eq:V8QI
- (us_minus:V8QI
- (match_operand:V8QI 1 "register_operand")
- (match_operand:V8QI 2 "register_operand"))
- (match_operand:V8QI 3 "const0_operand"))
- (match_operand:V8QI 4 "const0_operand")))]
- "TARGET_MMX_WITH_SSE"
- [(set (match_dup 0)
- (umin:V8QI (match_dup 1) (match_dup 2)))
- (set (match_dup 0)
- (eq:V8QI (match_dup 0) (match_dup 2)))])
-
(define_insn "<code><mode>3"
[(set (match_operand:VI1_16_32 0 "register_operand" "=x,Yw")
(umaxmin:VI1_16_32
@@ -3436,21 +3406,6 @@ (define_insn "<code><mode>3"
(set_attr "type" "sseiadd")
(set_attr "mode" "TI")])
-(define_split
- [(set (match_operand:V4QI 0 "register_operand")
- (eq:V4QI
- (eq:V4QI
- (us_minus:V4QI
- (match_operand:V4QI 1 "register_operand")
- (match_operand:V4QI 2 "register_operand"))
- (match_operand:V4QI 3 "const0_operand"))
- (match_operand:V4QI 4 "const0_operand")))]
- "TARGET_SSE2"
- [(set (match_dup 0)
- (umin:V4QI (match_dup 1) (match_dup 2)))
- (set (match_dup 0)
- (eq:V4QI (match_dup 0) (match_dup 2)))])
-
(define_insn "<code>v2hi3"
[(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,Yv")
(umaxmin:V2HI
@@ -3467,21 +3422,6 @@ (define_insn "<code>v2hi3"
(set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "TI")])
-(define_split
- [(set (match_operand:V2HI 0 "register_operand")
- (eq:V2HI
- (eq:V2HI
- (us_minus:V2HI
- (match_operand:V2HI 1 "register_operand")
- (match_operand:V2HI 2 "register_operand"))
- (match_operand:V2HI 3 "const0_operand"))
- (match_operand:V2HI 4 "const0_operand")))]
- "TARGET_SSE4_1"
- [(set (match_dup 0)
- (umin:V2HI (match_dup 1) (match_dup 2)))
- (set (match_dup 0)
- (eq:V2HI (match_dup 0) (match_dup 2)))])
-
(define_insn "ssse3_abs<mode>2"
[(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
(abs:MMXMODEI
@@ -3954,7 +3894,7 @@ (define_insn "mmx_pblendvb_v8qi"
(set_attr "btver2_decode" "vector")
(set_attr "mode" "TI")])
-(define_split
+(define_insn_and_split "*mmx_pblendvb_v8qi_1"
[(set (match_operand:V8QI 0 "register_operand")
(unspec:V8QI
[(match_operand:V8QI 1 "register_operand")
@@ -3962,21 +3902,26 @@ (define_split
(eq:V8QI
(eq:V8QI
(match_operand:V8QI 3 "register_operand")
- (match_operand:V8QI 4 "register_operand"))
+ (match_operand:V8QI 4 "nonmemory_operand"))
(match_operand:V8QI 5 "const0_operand"))]
UNSPEC_BLENDV))]
- "TARGET_MMX_WITH_SSE"
+ "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
[(set (match_dup 6)
- (eq:V8QI (match_dup 3) (match_dup 4)))
+ (eq:V8QI (match_dup 3) (match_dup 7)))
(set (match_dup 0)
(unspec:V8QI
[(match_dup 2)
(match_dup 1)
(match_dup 6)]
UNSPEC_BLENDV))]
- "operands[6] = gen_reg_rtx (V8QImode);")
+{
+ operands[6] = gen_reg_rtx (V8QImode);
+ operands[7] = force_reg (V8QImode, operands[4]);
+})
-(define_split
+(define_insn_and_split "*mmx_pblendvb_v8qi_2"
[(set (match_operand:V8QI 0 "register_operand")
(unspec:V8QI
[(match_operand:V8QI 1 "register_operand")
@@ -3985,12 +3930,14 @@ (define_split
(eq:MMXMODE24
(eq:MMXMODE24
(match_operand:MMXMODE24 3 "register_operand")
- (match_operand:MMXMODE24 4 "register_operand"))
+ (match_operand:MMXMODE24 4 "nonmemory_operand"))
(match_operand:MMXMODE24 5 "const0_operand")) 0)]
UNSPEC_BLENDV))]
- "TARGET_MMX_WITH_SSE"
+ "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
[(set (match_dup 6)
- (eq:MMXMODE24 (match_dup 3) (match_dup 4)))
+ (eq:MMXMODE24 (match_dup 3) (match_dup 8)))
(set (match_dup 0)
(unspec:V8QI
[(match_dup 2)
@@ -4000,6 +3947,7 @@ (define_split
{
operands[6] = gen_reg_rtx (<MODE>mode);
operands[7] = lowpart_subreg (V8QImode, operands[6], <MODE>mode);
+ operands[8] = force_reg (<MODE>mode, operands[4]);
})
(define_insn "mmx_pblendvb_<mode>"
@@ -4022,7 +3970,7 @@ (define_insn "mmx_pblendvb_<mode>"
(set_attr "btver2_decode" "vector")
(set_attr "mode" "TI")])
-(define_split
+(define_insn_and_split "*mmx_pblendvb_<mode>_1"
[(set (match_operand:VI_16_32 0 "register_operand")
(unspec:VI_16_32
[(match_operand:VI_16_32 1 "register_operand")
@@ -4030,21 +3978,26 @@ (define_split
(eq:VI_16_32
(eq:VI_16_32
(match_operand:VI_16_32 3 "register_operand")
- (match_operand:VI_16_32 4 "register_operand"))
+ (match_operand:VI_16_32 4 "nonmemory_operand"))
(match_operand:VI_16_32 5 "const0_operand"))]
UNSPEC_BLENDV))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
[(set (match_dup 6)
- (eq:VI_16_32 (match_dup 3) (match_dup 4)))
+ (eq:VI_16_32 (match_dup 3) (match_dup 7)))
(set (match_dup 0)
(unspec:VI_16_32
[(match_dup 2)
(match_dup 1)
(match_dup 6)]
UNSPEC_BLENDV))]
- "operands[6] = gen_reg_rtx (<MODE>mode);")
+{
+ operands[6] = gen_reg_rtx (<MODE>mode);
+ operands[7] = force_reg (<MODE>mode, operands[4]);
+})
-(define_split
+(define_insn_and_split "*mmx_pblendvb_v4qi_2"
[(set (match_operand:V4QI 0 "register_operand")
(unspec:V4QI
[(match_operand:V4QI 1 "register_operand")
@@ -4053,12 +4006,14 @@ (define_split
(eq:V2HI
(eq:V2HI
(match_operand:V2HI 3 "register_operand")
- (match_operand:V2HI 4 "register_operand"))
+ (match_operand:V2HI 4 "nonmemory_operand"))
(match_operand:V2HI 5 "const0_operand")) 0)]
UNSPEC_BLENDV))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
[(set (match_dup 6)
- (eq:V2HI (match_dup 3) (match_dup 4)))
+ (eq:V2HI (match_dup 3) (match_dup 8)))
(set (match_dup 0)
(unspec:V4QI
[(match_dup 2)
@@ -4068,6 +4023,7 @@ (define_split
{
operands[6] = gen_reg_rtx (V2HImode);
operands[7] = lowpart_subreg (V4QImode, operands[6], V2HImode);
+ operands[8] = force_reg (V2HImode, operands[4]);
})
;; XOP parallel XMM conditional moves
@@ -3,7 +3,6 @@
/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
/* { dg-final { scan-assembler-times "vpcmpeqw" 6 } } */
/* { dg-final { scan-assembler-times "vpcmpgtw" 2 } } */
-/* { dg-final { scan-assembler-times "vpminuw" 2 } } */
/* { dg-final { scan-assembler-times "vcmpph" 8 } } */
/* { dg-final { scan-assembler-times "vpblendvb" 8 } } */
typedef unsigned short __attribute__((__vector_size__ (4))) __v2hu;
new file mode 100644
@@ -0,0 +1,36 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+
+#include "sse4_1-check.h"
+
+typedef unsigned short __attribute__((__vector_size__ (8))) U4;
+typedef unsigned short __attribute__((__vector_size__ (4))) U2;
+
+U4
+__attribute__((noipa))
+foo4 (U4 a, U4 b)
+{
+ return a > b;
+}
+
+U2
+__attribute__((noipa))
+foo2 (U2 a, U2 b)
+{
+ return a > b;
+}
+
+static void
+sse4_1_test ()
+{
+ U4 a = __extension__(U4) {1, 1, 1, 1};
+ U4 b = foo4 (a, a);
+ if (b[0] || b[1] || b[2] || b[3]) __builtin_abort();
+
+ U2 c = __extension__(U2) {1, 1};
+ U2 d = foo2 (c, c);
+ if (d[0] || d[1]) __builtin_abort();
+
+ return;
+}