Improve stack protector patterns and peephole2s to substitute stack
protector scratch register clear with unrelated subsequent register
initialization in several ways:
a. Explicitly generate scratch register as named pseudo. This allows
optimizers to eventually reuse the zero value in the register.
b. Allow scratch register in different mode (SWI48) than PTR mode:
d000: 65 48 8b 04 25 28 00 mov %gs:0x28,%rax
d007: 00 00
d009: 48 89 44 24 08 mov %rax,0x8(%rsp)
d00e: 8b 87 e0 01 00 00 mov 0x1e0(%rdi),%eax
SImode moves on x86 zero-extend to the whole DImode register,
so stack protector paranoia is not compromised.
c. Relax peephole2 constraint that stack protector scratch register
must match new initialized register. This relaxation substantially
improves peephole2 opportunities, and generates sequences like:
a310: 65 4c 8b 34 25 28 00 mov %gs:0x28,%r14
a317: 00 00
a319: 4c 89 74 24 08 mov %r14,0x8(%rsp)
a31e: 4c 8b b7 98 00 00 00 mov 0x98(%rdi),%r14
We have to ensure the new scratch is dead in front of the sequence.
The patch also fixes omission of earlyclobbers for all alternatives of
new initialized register in *stack_protect_set_3, avoiding the need for
reg_overlap_mentioned_p constraint. Earlyclobbers are per alternative,
not per operand.
Also, instructions are already valid in peephole2 pass, so we don't
have to explicitly re-check their operands for validity.
gcc/ChangeLog:
* config/i386/i386.md (stack_protect_set): Explicitly
generate scratch register in word mode.
(@stack_protect_set_1_<mode>): Rename to ...
(@stack_protect_set_1_<PTR:mode>_<SWI48:mode>): ... this.
Use SWI48 mode iterator to match scratch register.
(stack_protexct_set_1 peephole2): Use PTR, W and SWI48 mode
iterators to match peephole sequence. Use general_operand
predicate for operand 4. Allow different operand 2 and operand 3
registers and use peep2_reg_dead_p to ensure new scratch
register is dead before peephole seqeunce. Use peep2_reg_dead_p
to ensure old scratch register is dead after peephole sequence.
(*stack_protect_set_2_<mode>): Rename to ...
(*stack_protect_set_2_<mode>_si): .. this.
(*stack_protect_set_3): Rename to ...
(*stack_protect_set_2_<mode>_di): ... this.
Use PTR mode iterator to match stack protector memory move.
Use earlyclobber for all alternatives of operand 1.
(stack_protexct_set_2 peephole2): Use PTR, W and SWI48 mode
iterators to match peephole sequence. Use general_operand
predicate for operand 4. Allow different operand 2 and operand 3
registers and use peep2_reg_dead_p to ensure new scratch
register is dead before peephole seqeunce. Use peep2_reg_dead_p
to ensure old scratch register is dead after peephole sequence.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Uros.
@@ -25653,40 +25653,58 @@ (define_expand "stack_protect_set"
(match_operand 1 "memory_operand")]
""
{
+ rtx scratch = gen_reg_rtx (word_mode);
+
emit_insn (gen_stack_protect_set_1
- (ptr_mode, operands[0], operands[1]));
+ (ptr_mode, word_mode, operands[0], operands[1], scratch));
DONE;
})
-(define_insn "@stack_protect_set_1_<mode>"
+(define_insn "@stack_protect_set_1_<PTR:mode>_<SWI48:mode>"
[(set (match_operand:PTR 0 "memory_operand" "=m")
(unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")]
UNSPEC_SP_SET))
- (set (match_scratch:PTR 2 "=&r") (const_int 0))
+ (set (match_operand:SWI48 2 "register_operand" "=&r") (const_int 0))
(clobber (reg:CC FLAGS_REG))]
""
{
- output_asm_insn ("mov{<imodesuffix>}\t{%1, %2|%2, %1}", operands);
- output_asm_insn ("mov{<imodesuffix>}\t{%2, %0|%0, %2}", operands);
+ output_asm_insn ("mov{<PTR:imodesuffix>}\t{%1, %<PTR:k>2|%<PTR:k>2, %1}",
+ operands);
+ output_asm_insn ("mov{<PTR:imodesuffix>}\t{%<PTR:k>2, %0|%0, %<PTR:k>2}",
+ operands);
return "xor{l}\t%k2, %k2";
}
[(set_attr "type" "multi")])
;; Patterns and peephole2s to optimize stack_protect_set_1_<mode>
-;; immediately followed by *mov{s,d}i_internal to the same register,
-;; where we can avoid the xor{l} above. We don't split this, so that
-;; scheduling or anything else doesn't separate the *stack_protect_set*
-;; pattern from the set of the register that overwrites the register
-;; with a new value.
-(define_insn "*stack_protect_set_2_<mode>"
+;; immediately followed by *mov{s,d}i_internal, where we can avoid
+;; the xor{l} above. We don't split this, so that scheduling or
+;; anything else doesn't separate the *stack_protect_set* pattern from
+;; the set of the register that overwrites the register with a new value.
+
+(define_peephole2
+ [(parallel [(set (match_operand:PTR 0 "memory_operand")
+ (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
+ UNSPEC_SP_SET))
+ (set (match_operand:W 2 "general_reg_operand") (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_operand:SWI48 3 "general_reg_operand")
+ (match_operand:SWI48 4 "const0_operand"))
+ (clobber (reg:CC FLAGS_REG))])]
+ "peep2_reg_dead_p (0, operands[3])
+ && peep2_reg_dead_p (1, operands[2])"
+ [(parallel [(set (match_dup 0)
+ (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
+ (set (match_dup 3) (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])])
+
+(define_insn "*stack_protect_set_2_<mode>_si"
[(set (match_operand:PTR 0 "memory_operand" "=m")
(unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
UNSPEC_SP_SET))
(set (match_operand:SI 1 "register_operand" "=&r")
- (match_operand:SI 2 "general_operand" "g"))
- (clobber (reg:CC FLAGS_REG))]
- "reload_completed
- && !reg_overlap_mentioned_p (operands[1], operands[2])"
+ (match_operand:SI 2 "general_operand" "g"))]
+ "reload_completed"
{
output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
@@ -25699,38 +25717,16 @@ (define_insn "*stack_protect_set_2_<mode>"
[(set_attr "type" "multi")
(set_attr "length" "24")])
-(define_peephole2
- [(parallel [(set (match_operand:PTR 0 "memory_operand")
- (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
- UNSPEC_SP_SET))
- (set (match_operand:PTR 2 "general_reg_operand") (const_int 0))
- (clobber (reg:CC FLAGS_REG))])
- (set (match_operand:SI 3 "general_reg_operand")
- (match_operand:SI 4))]
- "REGNO (operands[2]) == REGNO (operands[3])
- && general_operand (operands[4], SImode)
- && (general_reg_operand (operands[4], SImode)
- || memory_operand (operands[4], SImode)
- || immediate_operand (operands[4], SImode))
- && !reg_overlap_mentioned_p (operands[3], operands[4])"
- [(parallel [(set (match_dup 0)
- (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
- (set (match_dup 3) (match_dup 4))
- (clobber (reg:CC FLAGS_REG))])])
-
-(define_insn "*stack_protect_set_3"
- [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
- (unspec:DI [(match_operand:DI 3 "memory_operand" "m,m,m")]
- UNSPEC_SP_SET))
- (set (match_operand:DI 1 "register_operand" "=&r,r,r")
- (match_operand:DI 2 "general_operand" "Z,rem,i"))
- (clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT
- && reload_completed
- && !reg_overlap_mentioned_p (operands[1], operands[2])"
+(define_insn "*stack_protect_set_2_<mode>_di"
+ [(set (match_operand:PTR 0 "memory_operand" "=m,m,m")
+ (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m,m,m")]
+ UNSPEC_SP_SET))
+ (set (match_operand:DI 1 "register_operand" "=&r,&r,&r")
+ (match_operand:DI 2 "general_operand" "Z,rem,i"))]
+ "TARGET_64BIT && reload_completed"
{
- output_asm_insn ("mov{q}\t{%3, %1|%1, %3}", operands);
- output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", operands);
+ output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
+ output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
if (pic_32bit_operand (operands[2], DImode))
return "lea{q}\t{%E2, %1|%1, %E2}";
else if (which_alternative == 0)
@@ -25746,25 +25742,18 @@ (define_insn "*stack_protect_set_3"
(set_attr "length" "24")])
(define_peephole2
- [(parallel [(set (match_operand:DI 0 "memory_operand")
- (unspec:DI [(match_operand:DI 1 "memory_operand")]
- UNSPEC_SP_SET))
- (set (match_operand:DI 2 "general_reg_operand") (const_int 0))
- (clobber (reg:CC FLAGS_REG))])
- (set (match_dup 2) (match_operand:DI 3))]
- "TARGET_64BIT
- && general_operand (operands[3], DImode)
- && (general_reg_operand (operands[3], DImode)
- || memory_operand (operands[3], DImode)
- || x86_64_zext_immediate_operand (operands[3], DImode)
- || x86_64_immediate_operand (operands[3], DImode)
- || (CONSTANT_P (operands[3])
- && (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[3]))))
- && !reg_overlap_mentioned_p (operands[2], operands[3])"
- [(parallel [(set (match_dup 0)
- (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
- (set (match_dup 2) (match_dup 3))
- (clobber (reg:CC FLAGS_REG))])])
+ [(parallel [(set (match_operand:PTR 0 "memory_operand")
+ (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
+ UNSPEC_SP_SET))
+ (set (match_operand:W 2 "general_reg_operand") (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_operand:SWI48 3 "general_reg_operand")
+ (match_operand:SWI48 4 "general_operand"))]
+ "peep2_reg_dead_p (0, operands[3])
+ && peep2_reg_dead_p (1, operands[2])"
+ [(parallel [(set (match_dup 0)
+ (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
+ (set (match_dup 3) (match_dup 4))])])
(define_expand "stack_protect_test"
[(match_operand 0 "memory_operand")