[v1] LoongArch: Remove the symbolic extension instruction due to the SLT directive.
Checks
Commit Message
Since the slt instruction does not distinguish between 32-bit and 64-bit operations
under the LoongArch 64-bit architecture, if the operands of slt are of SImode, symbol
expansion is required before operation.
But similar to the following test case, symbol expansion can be omitted:
extern int src1, src2, src3;
int
test (void)
{
int data1 = src1 + src2;
int data2 = src1 + src3;
return test1 > test2 ? test1 : test2;
}
Assembly code before optimization:
...
add.w $r4,$r4,$r14
add.w $r13,$r13,$r14
slli.w $r12,$r4,0
slli.w $r14,$r13,0
slt $r12,$r12,$r14
masknez $r4,$r4,$r12
maskeqz $r12,$r13,$r12
or $r4,$r4,$r12
slli.w $r4,$r4,0
...
After optimization:
...
add.w $r12,$r12,$r14
add.w $r13,$r13,$r14
slt $r4,$r12,$r13
masknez $r12,$r12,$r4
maskeqz $r4,$r13,$r4
or $r4,$r12,$r4
...
Similar to this test example, the two operands of SLT are obtained by the
addition operation, and the addition operation "add.w" is an implicit
symbolic extension function, so the two operands of SLT do not require
symbolic expansion.
gcc/ChangeLog:
* config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
Optimize the function implementation.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/slt-sign-extend.c: New test.
---
gcc/config/loongarch/loongarch.cc | 53 +++++++++++++++++--
.../gcc.target/loongarch/slt-sign-extend.c | 14 +++++
2 files changed, 63 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
Comments
On 8/25/23 12:01, Lulu Cheng wrote:
> Since the slt instruction does not distinguish between 32-bit and 64-bit operations
> under the LoongArch 64-bit architecture, if the operands of slt are of SImode, symbol
> expansion is required before operation.
Hint:“符号扩展” is "sign extension" (as noun) or "sign-extend" (as verb),
not "symbol expansion".
>
> But similar to the following test case, symbol expansion can be omitted:
>
> extern int src1, src2, src3;
>
> int
> test (void)
> {
> int data1 = src1 + src2;
> int data2 = src1 + src3;
> return test1 > test2 ? test1 : test2;
> }
> Assembly code before optimization:
> ...
> add.w $r4,$r4,$r14
> add.w $r13,$r13,$r14
> slli.w $r12,$r4,0
> slli.w $r14,$r13,0
> slt $r12,$r12,$r14
> masknez $r4,$r4,$r12
> maskeqz $r12,$r13,$r12
> or $r4,$r4,$r12
> slli.w $r4,$r4,0
> ...
>
> After optimization:
> ...
> add.w $r12,$r12,$r14
> add.w $r13,$r13,$r14
> slt $r4,$r12,$r13
> masknez $r12,$r12,$r4
> maskeqz $r4,$r13,$r4
> or $r4,$r12,$r4
> ...
>
> Similar to this test example, the two operands of SLT are obtained by the
> addition operation, and the addition operation "add.w" is an implicit
> symbolic extension function, so the two operands of SLT do not require
more naturally: "and add.w implicitly sign-extends" -- brevity are often
desired and clearer ;-)
> symbolic expansion.
>
> gcc/ChangeLog:
>
> * config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
> Optimize the function implementation.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/loongarch/slt-sign-extend.c: New test.
> ---
> gcc/config/loongarch/loongarch.cc | 53 +++++++++++++++++--
> .../gcc.target/loongarch/slt-sign-extend.c | 14 +++++
> 2 files changed, 63 insertions(+), 4 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
>
> diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
> index 86d58784113..1905599b9e8 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -4384,14 +4384,30 @@ loongarch_expand_conditional_move (rtx *operands)
> enum rtx_code code = GET_CODE (operands[1]);
> rtx op0 = XEXP (operands[1], 0);
> rtx op1 = XEXP (operands[1], 1);
> + rtx op0_extend = op0;
> + rtx op1_extend = op1;
> +
> + /* Record whether operands[2] and operands[3] modes are promoted to word_mode. */
> + bool promote_p = false;
> + machine_mode mode = GET_MODE (operands[0]);
>
> if (FLOAT_MODE_P (GET_MODE (op1)))
> loongarch_emit_float_compare (&code, &op0, &op1);
> else
> {
> + if ((REGNO (op0) == REGNO (operands[2])
> + || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
> + && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
> + {
> + mode = word_mode;
> + promote_p = true;
> + }
> +
> loongarch_extend_comparands (code, &op0, &op1);
>
> op0 = force_reg (word_mode, op0);
> + op0_extend = op0;
> + op1_extend = force_reg (word_mode, op1);
>
> if (code == EQ || code == NE)
> {
> @@ -4418,23 +4434,52 @@ loongarch_expand_conditional_move (rtx *operands)
> && register_operand (operands[2], VOIDmode)
> && register_operand (operands[3], VOIDmode))
> {
> - machine_mode mode = GET_MODE (operands[0]);
> + rtx op2 = operands[2];
> + rtx op3 = operands[3];
> +
> + if (promote_p)
> + {
> + if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
> + op2 = op0_extend;
> + else
> + {
> + loongarch_extend_comparands (code, &op2, &const0_rtx);
> + op2 = force_reg (mode, op2);
> + }
> +
> + if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
> + op3 = op1_extend;
> + else
> + {
> + loongarch_extend_comparands (code, &op3, &const0_rtx);
> + op3 = force_reg (mode, op3);
> + }
> + }
> +
> rtx temp = gen_reg_rtx (mode);
> rtx temp2 = gen_reg_rtx (mode);
>
> emit_insn (gen_rtx_SET (temp,
> gen_rtx_IF_THEN_ELSE (mode, cond,
> - operands[2], const0_rtx)));
> + op2, const0_rtx)));
>
> /* Flip the test for the second operand. */
> cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1);
>
> emit_insn (gen_rtx_SET (temp2,
> gen_rtx_IF_THEN_ELSE (mode, cond,
> - operands[3], const0_rtx)));
> + op3, const0_rtx)));
>
> /* Merge the two results, at least one is guaranteed to be zero. */
> - emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
> + if (promote_p)
> + {
> + rtx temp3 = gen_reg_rtx (mode);
> + emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
> + temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
> + loongarch_emit_move (operands[0], temp3);
> + }
> + else
> + emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
> }
> else
> emit_insn (gen_rtx_SET (operands[0],
> diff --git a/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
> new file mode 100644
> index 00000000000..3863db79aaf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mabi=lp64d -O2" } */
> +/* { dg-final { scan-assembler-not "slli.w" } } */
> +
> +extern int src1, src2, src3;
> +
> +int
> +test (void)
> +{
> + int data1 = src1 + src2;
> + int data2 = src1 + src3;
> +
> + return test1 > test2 ? test1 : test2;
> +}
Otherwise I think this is okay, and nice catch! ;-)
在 2023/8/25 下午12:16, WANG Xuerui 写道:
> On 8/25/23 12:01, Lulu Cheng wrote:
>> Since the slt instruction does not distinguish between 32-bit and
>> 64-bit operations
>> under the LoongArch 64-bit architecture, if the operands of slt are
>> of SImode, symbol
>> expansion is required before operation.
> Hint:“符号扩展” is "sign extension" (as noun) or "sign-extend" (as verb),
> not "symbol expansion".
>>
>> But similar to the following test case, symbol expansion can be omitted:
>>
>> extern int src1, src2, src3;
>>
>> int
>> test (void)
>> {
>> int data1 = src1 + src2;
>> int data2 = src1 + src3;
>> return test1 > test2 ? test1 : test2;
>> }
>> Assembly code before optimization:
>> ...
>> add.w $r4,$r4,$r14
>> add.w $r13,$r13,$r14
>> slli.w $r12,$r4,0
>> slli.w $r14,$r13,0
>> slt $r12,$r12,$r14
>> masknez $r4,$r4,$r12
>> maskeqz $r12,$r13,$r12
>> or $r4,$r4,$r12
>> slli.w $r4,$r4,0
>> ...
>>
>> After optimization:
>> ...
>> add.w $r12,$r12,$r14
>> add.w $r13,$r13,$r14
>> slt $r4,$r12,$r13
>> masknez $r12,$r12,$r4
>> maskeqz $r4,$r13,$r4
>> or $r4,$r12,$r4
>> ...
>>
>> Similar to this test example, the two operands of SLT are obtained by
>> the
>> addition operation, and the addition operation "add.w" is an implicit
>> symbolic extension function, so the two operands of SLT do not require
>
> more naturally: "and add.w implicitly sign-extends" -- brevity are
> often desired and clearer ;-)
Sorry I'll revise it soon!
Thanks!:-)
>
>> symbolic expansion.
>>
>> gcc/ChangeLog:
>>
>> * config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
>> Optimize the function implementation.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/loongarch/slt-sign-extend.c: New test.
>> ---
>> gcc/config/loongarch/loongarch.cc | 53 +++++++++++++++++--
>> .../gcc.target/loongarch/slt-sign-extend.c | 14 +++++
>> 2 files changed, 63 insertions(+), 4 deletions(-)
>> create mode 100644
>> gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
>>
>> diff --git a/gcc/config/loongarch/loongarch.cc
>> b/gcc/config/loongarch/loongarch.cc
>> index 86d58784113..1905599b9e8 100644
>> --- a/gcc/config/loongarch/loongarch.cc
>> +++ b/gcc/config/loongarch/loongarch.cc
>> @@ -4384,14 +4384,30 @@ loongarch_expand_conditional_move (rtx
>> *operands)
>> enum rtx_code code = GET_CODE (operands[1]);
>> rtx op0 = XEXP (operands[1], 0);
>> rtx op1 = XEXP (operands[1], 1);
>> + rtx op0_extend = op0;
>> + rtx op1_extend = op1;
>> +
>> + /* Record whether operands[2] and operands[3] modes are promoted
>> to word_mode. */
>> + bool promote_p = false;
>> + machine_mode mode = GET_MODE (operands[0]);
>> if (FLOAT_MODE_P (GET_MODE (op1)))
>> loongarch_emit_float_compare (&code, &op0, &op1);
>> else
>> {
>> + if ((REGNO (op0) == REGNO (operands[2])
>> + || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
>> + && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
>> + {
>> + mode = word_mode;
>> + promote_p = true;
>> + }
>> +
>> loongarch_extend_comparands (code, &op0, &op1);
>> op0 = force_reg (word_mode, op0);
>> + op0_extend = op0;
>> + op1_extend = force_reg (word_mode, op1);
>> if (code == EQ || code == NE)
>> {
>> @@ -4418,23 +4434,52 @@ loongarch_expand_conditional_move (rtx
>> *operands)
>> && register_operand (operands[2], VOIDmode)
>> && register_operand (operands[3], VOIDmode))
>> {
>> - machine_mode mode = GET_MODE (operands[0]);
>> + rtx op2 = operands[2];
>> + rtx op3 = operands[3];
>> +
>> + if (promote_p)
>> + {
>> + if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
>> + op2 = op0_extend;
>> + else
>> + {
>> + loongarch_extend_comparands (code, &op2, &const0_rtx);
>> + op2 = force_reg (mode, op2);
>> + }
>> +
>> + if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
>> + op3 = op1_extend;
>> + else
>> + {
>> + loongarch_extend_comparands (code, &op3, &const0_rtx);
>> + op3 = force_reg (mode, op3);
>> + }
>> + }
>> +
>> rtx temp = gen_reg_rtx (mode);
>> rtx temp2 = gen_reg_rtx (mode);
>> emit_insn (gen_rtx_SET (temp,
>> gen_rtx_IF_THEN_ELSE (mode, cond,
>> - operands[2], const0_rtx)));
>> + op2, const0_rtx)));
>> /* Flip the test for the second operand. */
>> cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE
>> (op0), op0, op1);
>> emit_insn (gen_rtx_SET (temp2,
>> gen_rtx_IF_THEN_ELSE (mode, cond,
>> - operands[3], const0_rtx)));
>> + op3, const0_rtx)));
>> /* Merge the two results, at least one is guaranteed to be
>> zero. */
>> - emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp,
>> temp2)));
>> + if (promote_p)
>> + {
>> + rtx temp3 = gen_reg_rtx (mode);
>> + emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
>> + temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
>> + loongarch_emit_move (operands[0], temp3);
>> + }
>> + else
>> + emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp,
>> temp2)));
>> }
>> else
>> emit_insn (gen_rtx_SET (operands[0],
>> diff --git a/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
>> b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
>> new file mode 100644
>> index 00000000000..3863db79aaf
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
>> @@ -0,0 +1,14 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-mabi=lp64d -O2" } */
>> +/* { dg-final { scan-assembler-not "slli.w" } } */
>> +
>> +extern int src1, src2, src3;
>> +
>> +int
>> +test (void)
>> +{
>> + int data1 = src1 + src2;
>> + int data2 = src1 + src3;
>> +
>> + return test1 > test2 ? test1 : test2;
>> +}
> Otherwise I think this is okay, and nice catch! ;-)
@@ -4384,14 +4384,30 @@ loongarch_expand_conditional_move (rtx *operands)
enum rtx_code code = GET_CODE (operands[1]);
rtx op0 = XEXP (operands[1], 0);
rtx op1 = XEXP (operands[1], 1);
+ rtx op0_extend = op0;
+ rtx op1_extend = op1;
+
+ /* Record whether operands[2] and operands[3] modes are promoted to word_mode. */
+ bool promote_p = false;
+ machine_mode mode = GET_MODE (operands[0]);
if (FLOAT_MODE_P (GET_MODE (op1)))
loongarch_emit_float_compare (&code, &op0, &op1);
else
{
+ if ((REGNO (op0) == REGNO (operands[2])
+ || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
+ && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
+ {
+ mode = word_mode;
+ promote_p = true;
+ }
+
loongarch_extend_comparands (code, &op0, &op1);
op0 = force_reg (word_mode, op0);
+ op0_extend = op0;
+ op1_extend = force_reg (word_mode, op1);
if (code == EQ || code == NE)
{
@@ -4418,23 +4434,52 @@ loongarch_expand_conditional_move (rtx *operands)
&& register_operand (operands[2], VOIDmode)
&& register_operand (operands[3], VOIDmode))
{
- machine_mode mode = GET_MODE (operands[0]);
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+
+ if (promote_p)
+ {
+ if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
+ op2 = op0_extend;
+ else
+ {
+ loongarch_extend_comparands (code, &op2, &const0_rtx);
+ op2 = force_reg (mode, op2);
+ }
+
+ if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
+ op3 = op1_extend;
+ else
+ {
+ loongarch_extend_comparands (code, &op3, &const0_rtx);
+ op3 = force_reg (mode, op3);
+ }
+ }
+
rtx temp = gen_reg_rtx (mode);
rtx temp2 = gen_reg_rtx (mode);
emit_insn (gen_rtx_SET (temp,
gen_rtx_IF_THEN_ELSE (mode, cond,
- operands[2], const0_rtx)));
+ op2, const0_rtx)));
/* Flip the test for the second operand. */
cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1);
emit_insn (gen_rtx_SET (temp2,
gen_rtx_IF_THEN_ELSE (mode, cond,
- operands[3], const0_rtx)));
+ op3, const0_rtx)));
/* Merge the two results, at least one is guaranteed to be zero. */
- emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
+ if (promote_p)
+ {
+ rtx temp3 = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
+ temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
+ loongarch_emit_move (operands[0], temp3);
+ }
+ else
+ emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
}
else
emit_insn (gen_rtx_SET (operands[0],
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2" } */
+/* { dg-final { scan-assembler-not "slli.w" } } */
+
+extern int src1, src2, src3;
+
+int
+test (void)
+{
+ int data1 = src1 + src2;
+ int data2 = src1 + src3;
+
+ return test1 > test2 ? test1 : test2;
+}