xtensa: Optimize boolean evaluation when SImode EQ/NE to zero if TARGET_MINMAX
Checks
Commit Message
This patch optimizes the boolean evaluation for equality to 0 in SImode
using the MINU (Minimum Value Unsigned) machine instruction available
when TARGET_MINMAX is configured, for example, (x != 0) to MINU(x, 1)
and (x == 0) to (MINU(x, 1) ^ 1).
/* example */
int test0(int x) {
return x == 0;
}
int test1(int x) {
return x != 0;
}
;; before
test0:
mov.n a10, a2
movi.n a9, 1
movi.n a2, 0
moveqz a2, a9, a10
ret.n
test1:
mov.n a10, a2
movi.n a9, 1
movi.n a2, 0
movnez a2, a9, a10
ret.n
;; after (prereq. TARGET_MINMAX)
test0:
movi.n a9, 1
minu a2, a2, a9
xor a2, a2, a9
ret.n
test1:
movi.n a9, 1
minu a2, a2, a9
ret.n
gcc/ChangeLog:
* config/xtensa/xtensa.cc (xtensa_expand_scc):
Add code for particular constants (only 0 and INT_MIN for now)
for EQ/NE boolean evaluation in SImode.
* config/xtensa/xtensa.md (*eqne_INT_MIN): Remove because its
implementation has been integrated into the above.
---
gcc/config/xtensa/xtensa.cc | 43 +++++++++++++++++++++++++++++++------
gcc/config/xtensa/xtensa.md | 34 -----------------------------
2 files changed, 37 insertions(+), 40 deletions(-)
Comments
Hi Suwa-san,
On Tue, Sep 5, 2023 at 2:29 AM Takayuki 'January June' Suwa
<jjsuwa_sys3175@yahoo.co.jp> wrote:
>
> This patch optimizes the boolean evaluation for equality to 0 in SImode
> using the MINU (Minimum Value Unsigned) machine instruction available
> when TARGET_MINMAX is configured, for example, (x != 0) to MINU(x, 1)
> and (x == 0) to (MINU(x, 1) ^ 1).
>
> /* example */
> int test0(int x) {
> return x == 0;
> }
> int test1(int x) {
> return x != 0;
> }
>
> ;; before
> test0:
> mov.n a10, a2
> movi.n a9, 1
> movi.n a2, 0
> moveqz a2, a9, a10
> ret.n
> test1:
> mov.n a10, a2
> movi.n a9, 1
> movi.n a2, 0
> movnez a2, a9, a10
> ret.n
>
> ;; after (prereq. TARGET_MINMAX)
> test0:
> movi.n a9, 1
> minu a2, a2, a9
> xor a2, a2, a9
> ret.n
ISTM that test0 could be done with movnez in the same three instructions:
movi a9, 1
movnez a2, a9, a2
xor a2, a2, a9
> test1:
> movi.n a9, 1
> minu a2, a2, a9
> ret.n
ISTM that test1 could be done with movnez in the same two instructions:
movi a9, 1
movnez a2, a9, a2
On 2023/09/06 8:01, Max Filippov wrote:
> Hi Suwa-san,
Hi!
>
> On Tue, Sep 5, 2023 at 2:29 AM Takayuki 'January June' Suwa
> <jjsuwa_sys3175@yahoo.co.jp> wrote:
>>
>> This patch optimizes the boolean evaluation for equality to 0 in SImode
>> using the MINU (Minimum Value Unsigned) machine instruction available
>> when TARGET_MINMAX is configured, for example, (x != 0) to MINU(x, 1)
>> and (x == 0) to (MINU(x, 1) ^ 1).
>>
>> /* example */
>> int test0(int x) {
>> return x == 0;
>> }
>> int test1(int x) {
>> return x != 0;
>> }
>>
>> ;; before
>> test0:
>> mov.n a10, a2
>> movi.n a9, 1
>> movi.n a2, 0
>> moveqz a2, a9, a10
>> ret.n
>> test1:
>> mov.n a10, a2
>> movi.n a9, 1
>> movi.n a2, 0
>> movnez a2, a9, a10
>> ret.n
>>
>> ;; after (prereq. TARGET_MINMAX)
>> test0:
>> movi.n a9, 1
>> minu a2, a2, a9
>> xor a2, a2, a9
>> ret.n
>
> ISTM that test0 could be done with movnez in the same three instructions:
>
> movi a9, 1
> movnez a2, a9, a2
> xor a2, a2, a9
Unfortunately, the MOV[EQ/NE]Z machine instruction can only be used to implement the functionality if the input and output physical registers are the same (a2 in the example).
In fact, when modified to use MOV[EQ/NE]Z, GCC register allocator often prepends a register move instruction to satisfy the above constraint (and thus often does not save instruction count).
I'm currently trying to see if I can somehow follow up after the physical register is determined (around split2 or peephole2).
>
>> test1:
>> movi.n a9, 1
>> minu a2, a2, a9
>> ret.n
>
> ISTM that test1 could be done with movnez in the same two instructions:
>
> movi a9, 1
> movnez a2, a9, a2
>
On Tue, Sep 5, 2023 at 9:24 PM Takayuki 'January June' Suwa
<jjsuwa_sys3175@yahoo.co.jp> wrote:
> On 2023/09/06 8:01, Max Filippov wrote:
> > On Tue, Sep 5, 2023 at 2:29 AM Takayuki 'January June' Suwa
> > <jjsuwa_sys3175@yahoo.co.jp> wrote:
> >> ;; after (prereq. TARGET_MINMAX)
> >> test0:
> >> movi.n a9, 1
> >> minu a2, a2, a9
> >> xor a2, a2, a9
> >> ret.n
> >
> > ISTM that test0 could be done with movnez in the same three instructions:
> >
> > movi a9, 1
> > movnez a2, a9, a2
> > xor a2, a2, a9
>
> Unfortunately, the MOV[EQ/NE]Z machine instruction can only be used
> to implement the functionality if the input and output physical registers
> are the same (a2 in the example).
Oh yeah, you're right, I missed that.
On Tue, Sep 5, 2023 at 2:29 AM Takayuki 'January June' Suwa
<jjsuwa_sys3175@yahoo.co.jp> wrote:
>
> This patch optimizes the boolean evaluation for equality to 0 in SImode
> using the MINU (Minimum Value Unsigned) machine instruction available
> when TARGET_MINMAX is configured, for example, (x != 0) to MINU(x, 1)
> and (x == 0) to (MINU(x, 1) ^ 1).
>
> /* example */
> int test0(int x) {
> return x == 0;
> }
> int test1(int x) {
> return x != 0;
> }
>
> ;; before
> test0:
> mov.n a10, a2
> movi.n a9, 1
> movi.n a2, 0
> moveqz a2, a9, a10
> ret.n
> test1:
> mov.n a10, a2
> movi.n a9, 1
> movi.n a2, 0
> movnez a2, a9, a10
> ret.n
>
> ;; after (prereq. TARGET_MINMAX)
> test0:
> movi.n a9, 1
> minu a2, a2, a9
> xor a2, a2, a9
> ret.n
> test1:
> movi.n a9, 1
> minu a2, a2, a9
> ret.n
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (xtensa_expand_scc):
> Add code for particular constants (only 0 and INT_MIN for now)
> for EQ/NE boolean evaluation in SImode.
> * config/xtensa/xtensa.md (*eqne_INT_MIN): Remove because its
> implementation has been integrated into the above.
> ---
> gcc/config/xtensa/xtensa.cc | 43 +++++++++++++++++++++++++++++++------
> gcc/config/xtensa/xtensa.md | 34 -----------------------------
> 2 files changed, 37 insertions(+), 40 deletions(-)
Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.
@@ -994,15 +994,46 @@ xtensa_expand_scc (rtx operands[4], machine_mode cmp_mode)
rtx cmp;
rtx one_tmp, zero_tmp;
rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
+ enum rtx_code code = GET_CODE (operands[1]);
- if (!(cmp = gen_conditional_move (GET_CODE (operands[1]), cmp_mode,
- operands[2], operands[3])))
+ if (cmp_mode == SImode && CONST_INT_P (operands[3])
+ && (code == EQ || code == NE))
+ switch (INTVAL (operands[3]))
+ {
+ case 0:
+ if (TARGET_MINMAX)
+ {
+ one_tmp = force_reg (SImode, const1_rtx);
+ emit_insn (gen_uminsi3 (dest, operands[2], one_tmp));
+ if (code == EQ)
+ emit_insn (gen_xorsi3 (dest, dest, one_tmp));
+ return 1;
+ }
+ break;
+ case -2147483648:
+ if (TARGET_ABS)
+ {
+ emit_insn (gen_abssi2 (dest, operands[2]));
+ if (code == EQ)
+ emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (31)));
+ else
+ {
+ emit_insn (gen_ashrsi3 (dest, dest, GEN_INT (31)));
+ emit_insn (gen_addsi3 (dest, dest, const1_rtx));
+ }
+ return 1;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (! (cmp = gen_conditional_move (code, cmp_mode,
+ operands[2], operands[3])))
return 0;
- one_tmp = gen_reg_rtx (SImode);
- zero_tmp = gen_reg_rtx (SImode);
- emit_insn (gen_movsi (one_tmp, const_true_rtx));
- emit_insn (gen_movsi (zero_tmp, const0_rtx));
+ one_tmp = force_reg (SImode, const1_rtx);
+ zero_tmp = force_reg (SImode, const0_rtx);
gen_fn = (cmp_mode == SImode
? gen_movsicc_internal0
@@ -3188,40 +3188,6 @@
(const_int 5)
(const_int 6)))])
-
-(define_insn_and_split "*eqne_INT_MIN"
- [(set (match_operand:SI 0 "register_operand" "=a")
- (match_operator:SI 2 "boolean_operator"
- [(match_operand:SI 1 "register_operand" "r")
- (const_int -2147483648)]))]
- "TARGET_ABS"
- "#"
- "&& 1"
- [(set (match_dup 0)
- (abs:SI (match_dup 1)))
- (set (match_dup 0)
- (match_op_dup:SI 2
- [(match_dup 0)
- (const_int 31)]))
- (match_dup 3)]
-{
- enum rtx_code code = GET_CODE (operands[2]);
- operands[2] = gen_rtx_fmt_ee ((code == EQ) ? LSHIFTRT : ASHIFTRT,
- SImode, XEXP (operands[2], 0),
- XEXP (operands[2], 1));
- operands[3] = (code != EQ) ? gen_addsi3 (operands[0],
- operands[0], const1_rtx)
- : const0_rtx;
-}
- [(set_attr "type" "move")
- (set_attr "mode" "SI")
- (set (attr "length")
- (if_then_else (match_test "GET_CODE (operands[2]) == EQ")
- (const_int 3)
- (if_then_else (match_test "TARGET_DENSITY")
- (const_int 5)
- (const_int 6))))])
-
(define_peephole2
[(set (match_operand:SI 0 "register_operand")
(match_operand:SI 6 "reload_operand"))