@@ -1273,6 +1273,7 @@ bool ix86_can_use_ndd_p (enum rtx_code code)
case MINUS:
case NEG:
case NOT:
+ case AND:
return true;
default:
return false;
@@ -11471,18 +11471,20 @@ (define_expand "and<mode>3"
(operands[0], gen_lowpart (mode, operands[1]),
<MODE>mode, mode, 1));
else
- ix86_expand_binary_operator (AND, <MODE>mode, operands);
+ ix86_expand_binary_operator (AND, <MODE>mode, operands,
+ ix86_can_use_ndd_p (AND));
DONE;
})
(define_insn_and_split "*and<dwi>3_doubleword"
- [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,r")
(and:<DWI>
- (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
- (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
+ (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r")
+ (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (AND, <DWI>mode, operands)"
+ "ix86_binary_operator_ok (AND, <DWI>mode, operands,
+ ix86_can_use_ndd_p (AND))"
"#"
"&& reload_completed"
[(const_int:DWIH 0)]
@@ -11494,39 +11496,53 @@ (define_insn_and_split "*and<dwi>3_doubleword"
if (operands[2] == const0_rtx)
emit_move_insn (operands[0], const0_rtx);
else if (operands[2] == constm1_rtx)
- emit_insn_deleted_note_p = true;
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ else
+ emit_insn_deleted_note_p = true;
+ }
else
- ix86_expand_binary_operator (AND, <MODE>mode, &operands[0]);
+ ix86_expand_binary_operator (AND, <MODE>mode, &operands[0],
+ ix86_can_use_ndd_p (AND));
if (operands[5] == const0_rtx)
emit_move_insn (operands[3], const0_rtx);
else if (operands[5] == constm1_rtx)
{
- if (emit_insn_deleted_note_p)
+ if (!rtx_equal_p (operands[3], operands[4]))
+ emit_move_insn (operands[3], operands[4]);
+ else if (emit_insn_deleted_note_p)
emit_note (NOTE_INSN_DELETED);
}
else
- ix86_expand_binary_operator (AND, <MODE>mode, &operands[3]);
+ ix86_expand_binary_operator (AND, <MODE>mode, &operands[3],
+ ix86_can_use_ndd_p (AND));
DONE;
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
(define_insn "*anddi_1"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,?k")
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r,r,r,?k")
(and:DI
- (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm,k")
- (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,L,k")))
+ (match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,rm,r,qm,k")
+ (match_operand:DI 2 "x86_64_szext_general_operand" "Z,Z,re,m,re,m,L,k")))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
+ "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands,
+ ix86_can_use_ndd_p (AND))"
"@
and{l}\t{%k2, %k0|%k0, %k2}
+ and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
and{q}\t{%2, %0|%0, %2}
and{q}\t{%2, %0|%0, %2}
+ and{q}\t{%2, %1, %0|%0, %1, %2}
+ and{q}\t{%2, %1, %0|%0, %1, %2}
#
#"
- [(set_attr "isa" "x64,x64,x64,x64,avx512bw_512")
- (set_attr "type" "alu,alu,alu,imovx,msklog")
- (set_attr "length_immediate" "*,*,*,0,*")
+ [(set_attr "isa" "x64,apx_ndd,x64,x64,apx_ndd,apx_ndd,x64,avx512bw_512")
+ (set_attr "type" "alu,alu,alu,alu,alu,alu,imovx,msklog")
+ (set_attr "length_immediate" "*,*,*,*,*,*,0,*")
(set (attr "prefix_rex")
(if_then_else
(and (eq_attr "type" "imovx")
@@ -11534,7 +11550,7 @@ (define_insn "*anddi_1"
(match_operand 1 "ext_QIreg_operand")))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "SI,DI,DI,SI,DI")])
+ (set_attr "mode" "SI,SI,DI,DI,DI,DI,SI,DI")])
(define_insn_and_split "*anddi_1_btr"
[(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
@@ -11589,36 +11605,46 @@ (define_split
;; See comment for addsi_1_zext why we do use nonimmediate_operand
(define_insn "*andsi_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI
- (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
+ (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)"
- "and{l}\t{%2, %k0|%k0, %2}"
+ "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands,
+ ix86_can_use_ndd_p (AND))"
+ "@
+ and{l}\t{%2, %k0|%k0, %2}
+ and{l}\t{%2, %1, %k0|%k0, %1, %2}
+ and{l}\t{%2, %1, %k0|%k0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,apx_ndd,apx_ndd")
(set_attr "mode" "SI")])
(define_insn "*and<mode>_1"
- [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,Ya,?k")
- (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,qm,k")
- (match_operand:SWI24 2 "<general_operand>" "r<i>,<m>,L,k")))
+ [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,r,r,Ya,?k")
+ (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,rm,r,qm,k")
+ (match_operand:SWI24 2 "<general_operand>" "r<i>,<m>,r<i>,<m>,L,k")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (AND, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (AND, <MODE>mode, operands,
+ ix86_can_use_ndd_p (AND))"
"@
and{<imodesuffix>}\t{%2, %0|%0, %2}
and{<imodesuffix>}\t{%2, %0|%0, %2}
+ and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
#
#"
[(set (attr "isa")
- (cond [(eq_attr "alternative" "3")
+ (cond [(eq_attr "alternative" "2,3")
+ (const_string "apx_ndd")
+ (eq_attr "alternative" "5")
(if_then_else (eq_attr "mode" "SI")
(const_string "avx512bw")
(const_string "avx512f"))
]
(const_string "*")))
- (set_attr "type" "alu,alu,imovx,msklog")
- (set_attr "length_immediate" "*,*,0,*")
+ (set_attr "type" "alu,alu,alu,alu,imovx,msklog")
+ (set_attr "length_immediate" "*,*,*,*,0,*")
(set (attr "prefix_rex")
(if_then_else
(and (eq_attr "type" "imovx")
@@ -11626,24 +11652,28 @@ (define_insn "*and<mode>_1"
(match_operand 1 "ext_QIreg_operand")))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "<MODE>,<MODE>,SI,<MODE>")])
+ (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,SI,<MODE>")])
(define_insn "*andqi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
- (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
- (match_operand:QI 2 "general_operand" "qn,m,rn,k")))
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
+ (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
+ (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (AND, QImode, operands)"
+ "ix86_binary_operator_ok (AND, QImode, operands,
+ ix86_can_use_ndd_p (AND))"
"@
and{b}\t{%2, %0|%0, %2}
and{b}\t{%2, %0|%0, %2}
and{l}\t{%k2, %k0|%k0, %k2}
+ and{b}\t{%2, %1, %0|%0, %1, %2}
+ and{b}\t{%2, %1, %0|%0, %1, %2}
#"
- [(set_attr "type" "alu,alu,alu,msklog")
+ [(set_attr "type" "alu,alu,alu,alu,alu,msklog")
+ (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,*")
(set (attr "mode")
(cond [(eq_attr "alternative" "2")
(const_string "SI")
- (and (eq_attr "alternative" "3")
+ (and (eq_attr "alternative" "5")
(match_test "!TARGET_AVX512DQ"))
(const_string "HI")
]
@@ -11683,7 +11713,10 @@ (define_split
(clobber (reg:CC FLAGS_REG))]
"reload_completed
&& (!REG_P (operands[1])
- || REGNO (operands[0]) != REGNO (operands[1]))"
+ || REGNO (operands[0]) != REGNO (operands[1]))
+ && (UINTVAL (operands[2]) == GET_MODE_MASK (SImode)
+ || UINTVAL (operands[2]) == GET_MODE_MASK (HImode)
+ || UINTVAL (operands[2]) == GET_MODE_MASK (QImode))"
[(const_int 0)]
{
unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
@@ -11756,10 +11789,10 @@ (define_insn "*anddi_2"
[(set (reg FLAGS_REG)
(compare
(and:DI
- (match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
- (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m"))
+ (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,r,rm,r")
+ (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,Z,re,m"))
(const_int 0)))
- (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
+ (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,r,r")
(and:DI (match_dup 1) (match_dup 2)))]
"TARGET_64BIT
&& ix86_match_ccmode
@@ -11773,38 +11806,49 @@ (define_insn "*anddi_2"
&& (!CONST_INT_P (operands[2])
|| val_signbit_known_set_p (SImode, INTVAL (operands[2]))))
? CCZmode : CCNOmode)
- && ix86_binary_operator_ok (AND, DImode, operands)"
+ && ix86_binary_operator_ok (AND, DImode, operands,
+ ix86_can_use_ndd_p (AND))"
"@
and{l}\t{%k2, %k0|%k0, %k2}
and{q}\t{%2, %0|%0, %2}
- and{q}\t{%2, %0|%0, %2}"
+ and{q}\t{%2, %0|%0, %2}
+ and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
+ and{q}\t{%2, %1, %0|%0, %1, %2}
+ and{q}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "alu")
- (set_attr "mode" "SI,DI,DI")])
+ (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd")
+ (set_attr "mode" "SI,DI,DI,SI,DI,DI")])
;; See comment for addsi_1_zext why we do use nonimmediate_operand
(define_insn "*andsi_2_zext"
[(set (reg FLAGS_REG)
(compare (and:SI
- (match_operand:SI 1 "nonimmediate_operand" "%0")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
+ (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))
(const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=r")
+ (set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
- && ix86_binary_operator_ok (AND, SImode, operands)"
- "and{l}\t{%2, %k0|%k0, %2}"
+ && ix86_binary_operator_ok (AND, SImode, operands,
+ ix86_can_use_ndd_p (AND))"
+ "@
+ and{l}\t{%2, %k0|%k0, %2}
+ and{l}\t{%2, %1, %k0|%k0, %1, %2}
+ and{l}\t{%2, %1, %k0|%k0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,apx_ndd,apx_ndd")
(set_attr "mode" "SI")])
(define_insn "*andqi_2_maybe_si"
[(set (reg FLAGS_REG)
(compare (and:QI
- (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
- (match_operand:QI 2 "general_operand" "qn,m,n"))
+ (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r")
+ (match_operand:QI 2 "general_operand" "qn,m,n,rn,m"))
(const_int 0)))
- (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r")
(and:QI (match_dup 1) (match_dup 2)))]
- "ix86_binary_operator_ok (AND, QImode, operands)
+ "ix86_binary_operator_ok (AND, QImode, operands,
+ ix86_can_use_ndd_p (AND))
&& ix86_match_ccmode (insn,
CONST_INT_P (operands[2])
&& INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)"
@@ -11815,9 +11859,12 @@ (define_insn "*andqi_2_maybe_si"
operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
return "and{l}\t{%2, %k0|%k0, %2}";
}
+ if (which_alternative > 2)
+ return "and{b}\t{%2, %1, %0|%0, %1, %2}";
return "and{b}\t{%2, %0|%0, %2}";
}
[(set_attr "type" "alu")
+ (set_attr "isa" "*,*,*,apx_ndd,apx_ndd")
(set (attr "mode")
(cond [(eq_attr "alternative" "2")
(const_string "SI")
@@ -11836,15 +11883,21 @@ (define_insn "*andqi_2_maybe_si"
(define_insn "*and<mode>_2"
[(set (reg FLAGS_REG)
(compare (and:SWI124
- (match_operand:SWI124 1 "nonimmediate_operand" "%0,0")
- (match_operand:SWI124 2 "<general_operand>" "<r><i>,<m>"))
+ (match_operand:SWI124 1 "nonimmediate_operand" "%0,0,rm,r")
+ (match_operand:SWI124 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
(const_int 0)))
- (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>m,<r>")
+ (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
(and:SWI124 (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCNOmode)
- && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
- "and{<imodesuffix>}\t{%2, %0|%0, %2}"
+ && ix86_binary_operator_ok (AND, <MODE>mode, operands,
+ ix86_can_use_ndd_p (AND))"
+ "@
+ and{<imodesuffix>}\t{%2, %0|%0, %2}
+ and{<imodesuffix>}\t{%2, %0|%0, %2}
+ and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,*,apx_ndd,apx_ndd")
(set_attr "mode" "<MODE>")])
(define_insn "*<code>qi_ext<mode>_0"
@@ -12057,6 +12110,7 @@ (define_insn_and_split "*<code>qi_ext<mode>_3"
;; Don't do the splitting with memory operands, since it introduces risk
;; of memory mismatch stalls. We may want to do the splitting for optimizing
;; for size, but that can (should?) be handled by generic code instead.
+;; Don't do the splitting for APX NDD as NDD does not support *h registers.
(define_split
[(set (match_operand:SWI248 0 "QIreg_operand")
(and:SWI248 (match_operand:SWI248 1 "register_operand")
@@ -12064,7 +12118,8 @@ (define_split
(clobber (reg:CC FLAGS_REG))]
"reload_completed
&& (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
- && !(~INTVAL (operands[2]) & ~(255 << 8))"
+ && !(~INTVAL (operands[2]) & ~(255 << 8))
+ && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))"
[(parallel
[(set (zero_extract:HI (match_dup 0)
(const_int 8)
@@ -12093,7 +12148,9 @@ (define_split
"reload_completed
&& (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& !(~INTVAL (operands[2]) & ~255)
- && !(INTVAL (operands[2]) & 128)"
+ && !(INTVAL (operands[2]) & 128)
+ && !(TARGET_APX_NDD
+ && !rtx_equal_p (operands[0], operands[1]))"
[(parallel [(set (strict_low_part (match_dup 0))
(and:QI (match_dup 1)
(match_dup 2)))
@@ -85,6 +85,15 @@ F (int, not, ~)
F1 (int, not, ~)
F (long, not, ~)
F1 (long, not, ~)
+
+FOO (char, and, &)
+FOO1 (char, and, &)
+FOO (short, and, &)
+FOO1 (short, and, &)
+FOO (int, and, &)
+FOO1 (int, and, &)
+FOO (long, and, &)
+FOO1 (long, and, &)
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]%(?:|r|e)si, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
@@ -95,3 +104,7 @@ F1 (long, not, ~)
/* { dg-final { scan-assembler-times "neg(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "not(?:l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "not(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "andb\[^\n\r]*1, \\(%rdi\\), %al" 1 } } */
+/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 3 } } */
+/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */
+/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */
@@ -3,8 +3,8 @@
#include "spill_to_mask-1.c"
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r16d" } } */
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r17d" } } */
+/* { dg-final { scan-assembler "(?:movl|rorx)\[ \t]+\[^\\n\\r\]*, %r16d" } } */
+/* { dg-final { scan-assembler "(?:movl|rorx)\[ \t]+\[^\\n\\r\]*, %r17d" } } */
/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r18d" } } */
/* { dg-final { scan-assembler "movq\[ \t]+\[^\\n\\r\]*, %r19" } } */
/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r20d" } } */
@@ -13,8 +13,8 @@
/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r23d" } } */
/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r24d" } } */
/* { dg-final { scan-assembler "addl\[ \t]+\[^\\n\\r\]*, %r25d" } } */
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r26d" } } */
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r27d" } } */
+/* { dg-final { scan-assembler "(?:movl|movbel)\[ \t]+\[^\\n\\r\]*, %r26d" } } */
+/* { dg-final { scan-assembler "(?:movl|movbel)\[ \t]+\[^\\n\\r\]*, %r27d" } } */
/* { dg-final { scan-assembler "movbel\[ \t]+\[^\\n\\r\]*, %r28d" } } */
/* { dg-final { scan-assembler "movbel\[ \t]+\[^\\n\\r\]*, %r29d" } } */
/* { dg-final { scan-assembler "movbel\[ \t]+\[^\\n\\r\]*, %r30d" } } */