[v1] LoongArch: Remove the symbolic extension instruction due to the SLT directive.

Message ID 20230825040156.9209-1-chenglulu@loongson.cn
State Accepted
Headers
Series [v1] LoongArch: Remove the symbolic extension instruction due to the SLT directive. |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

chenglulu Aug. 25, 2023, 4:01 a.m. UTC
  Since the slt instruction does not distinguish between 32-bit and 64-bit operations
under the LoongArch 64-bit architecture, if the operands of slt are of SImode, symbol
expansion is required before operation.

But similar to the following test case, symbol expansion can be omitted:

	extern int src1, src2, src3;

	int
	test (void)
	{
	  int data1 = src1 + src2;
	  int data2 = src1 + src3;
	  return test1 > test2 ? test1 : test2;
	}
Assembly code before optimization:
 	...
	add.w	$r4,$r4,$r14
	add.w	$r13,$r13,$r14
	slli.w	$r12,$r4,0
	slli.w	$r14,$r13,0
	slt	$r12,$r12,$r14
	masknez	$r4,$r4,$r12
	maskeqz	$r12,$r13,$r12
	or	$r4,$r4,$r12
	slli.w	$r4,$r4,0
	...

After optimization:
	...
	add.w	$r12,$r12,$r14
	add.w	$r13,$r13,$r14
	slt	$r4,$r12,$r13
	masknez	$r12,$r12,$r4
	maskeqz	$r4,$r13,$r4
	or	$r4,$r12,$r4
	...

Similar to this test example, the two operands of SLT are obtained by the
addition operation, and the addition operation "add.w" is an implicit
symbolic extension function, so the two operands of SLT do not require
symbolic expansion.

gcc/ChangeLog:

	* config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
	Optimize the function implementation.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/slt-sign-extend.c: New test.
---
 gcc/config/loongarch/loongarch.cc             | 53 +++++++++++++++++--
 .../gcc.target/loongarch/slt-sign-extend.c    | 14 +++++
 2 files changed, 63 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
  

Comments

WANG Xuerui Aug. 25, 2023, 4:16 a.m. UTC | #1
On 8/25/23 12:01, Lulu Cheng wrote:
> Since the slt instruction does not distinguish between 32-bit and 64-bit operations
> under the LoongArch 64-bit architecture, if the operands of slt are of SImode, symbol
> expansion is required before operation.
Hint:“符号扩展” is "sign extension" (as noun) or "sign-extend" (as verb), 
not "symbol expansion".
>
> But similar to the following test case, symbol expansion can be omitted:
>
> 	extern int src1, src2, src3;
>
> 	int
> 	test (void)
> 	{
> 	  int data1 = src1 + src2;
> 	  int data2 = src1 + src3;
> 	  return test1 > test2 ? test1 : test2;
> 	}
> Assembly code before optimization:
>   	...
> 	add.w	$r4,$r4,$r14
> 	add.w	$r13,$r13,$r14
> 	slli.w	$r12,$r4,0
> 	slli.w	$r14,$r13,0
> 	slt	$r12,$r12,$r14
> 	masknez	$r4,$r4,$r12
> 	maskeqz	$r12,$r13,$r12
> 	or	$r4,$r4,$r12
> 	slli.w	$r4,$r4,0
> 	...
>
> After optimization:
> 	...
> 	add.w	$r12,$r12,$r14
> 	add.w	$r13,$r13,$r14
> 	slt	$r4,$r12,$r13
> 	masknez	$r12,$r12,$r4
> 	maskeqz	$r4,$r13,$r4
> 	or	$r4,$r12,$r4
> 	...
>
> Similar to this test example, the two operands of SLT are obtained by the
> addition operation, and the addition operation "add.w" is an implicit
> symbolic extension function, so the two operands of SLT do not require

more naturally: "and add.w implicitly sign-extends" -- brevity are often 
desired and clearer ;-)

> symbolic expansion.
>
> gcc/ChangeLog:
>
> 	* config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
> 	Optimize the function implementation.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/loongarch/slt-sign-extend.c: New test.
> ---
>   gcc/config/loongarch/loongarch.cc             | 53 +++++++++++++++++--
>   .../gcc.target/loongarch/slt-sign-extend.c    | 14 +++++
>   2 files changed, 63 insertions(+), 4 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
>
> diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
> index 86d58784113..1905599b9e8 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -4384,14 +4384,30 @@ loongarch_expand_conditional_move (rtx *operands)
>     enum rtx_code code = GET_CODE (operands[1]);
>     rtx op0 = XEXP (operands[1], 0);
>     rtx op1 = XEXP (operands[1], 1);
> +  rtx op0_extend = op0;
> +  rtx op1_extend = op1;
> +
> +  /* Record whether operands[2] and operands[3] modes are promoted to word_mode.  */
> +  bool promote_p = false;
> +  machine_mode mode = GET_MODE (operands[0]);
>   
>     if (FLOAT_MODE_P (GET_MODE (op1)))
>       loongarch_emit_float_compare (&code, &op0, &op1);
>     else
>       {
> +      if ((REGNO (op0) == REGNO (operands[2])
> +	   || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
> +	  && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
> +	{
> +	  mode = word_mode;
> +	  promote_p = true;
> +	}
> +
>         loongarch_extend_comparands (code, &op0, &op1);
>   
>         op0 = force_reg (word_mode, op0);
> +      op0_extend = op0;
> +      op1_extend = force_reg (word_mode, op1);
>   
>         if (code == EQ || code == NE)
>   	{
> @@ -4418,23 +4434,52 @@ loongarch_expand_conditional_move (rtx *operands)
>         && register_operand (operands[2], VOIDmode)
>         && register_operand (operands[3], VOIDmode))
>       {
> -      machine_mode mode = GET_MODE (operands[0]);
> +      rtx op2 = operands[2];
> +      rtx op3 = operands[3];
> +
> +      if (promote_p)
> +	{
> +	  if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
> +	    op2 = op0_extend;
> +	  else
> +	    {
> +	      loongarch_extend_comparands (code, &op2, &const0_rtx);
> +	      op2 = force_reg (mode, op2);
> +	    }
> +
> +	  if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
> +	    op3 = op1_extend;
> +	  else
> +	    {
> +	      loongarch_extend_comparands (code, &op3, &const0_rtx);
> +	      op3 = force_reg (mode, op3);
> +	    }
> +	}
> +
>         rtx temp = gen_reg_rtx (mode);
>         rtx temp2 = gen_reg_rtx (mode);
>   
>         emit_insn (gen_rtx_SET (temp,
>   			      gen_rtx_IF_THEN_ELSE (mode, cond,
> -						    operands[2], const0_rtx)));
> +						    op2, const0_rtx)));
>   
>         /* Flip the test for the second operand.  */
>         cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1);
>   
>         emit_insn (gen_rtx_SET (temp2,
>   			      gen_rtx_IF_THEN_ELSE (mode, cond,
> -						    operands[3], const0_rtx)));
> +						    op3, const0_rtx)));
>   
>         /* Merge the two results, at least one is guaranteed to be zero.  */
> -      emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
> +      if (promote_p)
> +	{
> +	  rtx temp3 = gen_reg_rtx (mode);
> +	  emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
> +	  temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
> +	  loongarch_emit_move (operands[0], temp3);
> +	}
> +      else
> +	emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
>       }
>     else
>       emit_insn (gen_rtx_SET (operands[0],
> diff --git a/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
> new file mode 100644
> index 00000000000..3863db79aaf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mabi=lp64d -O2" } */
> +/* { dg-final { scan-assembler-not "slli.w" } } */
> +
> +extern int src1, src2, src3;
> +
> +int
> +test (void)
> +{
> +  int data1 = src1 + src2;
> +  int data2 = src1 + src3;
> +
> +  return test1 > test2 ? test1 : test2;
> +}
Otherwise I think this is okay, and nice catch! ;-)
  
chenglulu Aug. 25, 2023, 6:05 a.m. UTC | #2
在 2023/8/25 下午12:16, WANG Xuerui 写道:
> On 8/25/23 12:01, Lulu Cheng wrote:
>> Since the slt instruction does not distinguish between 32-bit and 
>> 64-bit operations
>> under the LoongArch 64-bit architecture, if the operands of slt are 
>> of SImode, symbol
>> expansion is required before operation.
> Hint:“符号扩展” is "sign extension" (as noun) or "sign-extend" (as verb), 
> not "symbol expansion".
>>
>> But similar to the following test case, symbol expansion can be omitted:
>>
>>     extern int src1, src2, src3;
>>
>>     int
>>     test (void)
>>     {
>>       int data1 = src1 + src2;
>>       int data2 = src1 + src3;
>>       return test1 > test2 ? test1 : test2;
>>     }
>> Assembly code before optimization:
>>       ...
>>     add.w    $r4,$r4,$r14
>>     add.w    $r13,$r13,$r14
>>     slli.w    $r12,$r4,0
>>     slli.w    $r14,$r13,0
>>     slt    $r12,$r12,$r14
>>     masknez    $r4,$r4,$r12
>>     maskeqz    $r12,$r13,$r12
>>     or    $r4,$r4,$r12
>>     slli.w    $r4,$r4,0
>>     ...
>>
>> After optimization:
>>     ...
>>     add.w    $r12,$r12,$r14
>>     add.w    $r13,$r13,$r14
>>     slt    $r4,$r12,$r13
>>     masknez    $r12,$r12,$r4
>>     maskeqz    $r4,$r13,$r4
>>     or    $r4,$r12,$r4
>>     ...
>>
>> Similar to this test example, the two operands of SLT are obtained by 
>> the
>> addition operation, and the addition operation "add.w" is an implicit
>> symbolic extension function, so the two operands of SLT do not require
>
> more naturally: "and add.w implicitly sign-extends" -- brevity are 
> often desired and clearer ;-)

Sorry I'll revise it soon!

Thanks!:-)

>
>> symbolic expansion.
>>
>> gcc/ChangeLog:
>>
>>     * config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
>>     Optimize the function implementation.
>>
>> gcc/testsuite/ChangeLog:
>>
>>     * gcc.target/loongarch/slt-sign-extend.c: New test.
>> ---
>>   gcc/config/loongarch/loongarch.cc             | 53 +++++++++++++++++--
>>   .../gcc.target/loongarch/slt-sign-extend.c    | 14 +++++
>>   2 files changed, 63 insertions(+), 4 deletions(-)
>>   create mode 100644 
>> gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
>>
>> diff --git a/gcc/config/loongarch/loongarch.cc 
>> b/gcc/config/loongarch/loongarch.cc
>> index 86d58784113..1905599b9e8 100644
>> --- a/gcc/config/loongarch/loongarch.cc
>> +++ b/gcc/config/loongarch/loongarch.cc
>> @@ -4384,14 +4384,30 @@ loongarch_expand_conditional_move (rtx 
>> *operands)
>>     enum rtx_code code = GET_CODE (operands[1]);
>>     rtx op0 = XEXP (operands[1], 0);
>>     rtx op1 = XEXP (operands[1], 1);
>> +  rtx op0_extend = op0;
>> +  rtx op1_extend = op1;
>> +
>> +  /* Record whether operands[2] and operands[3] modes are promoted 
>> to word_mode.  */
>> +  bool promote_p = false;
>> +  machine_mode mode = GET_MODE (operands[0]);
>>       if (FLOAT_MODE_P (GET_MODE (op1)))
>>       loongarch_emit_float_compare (&code, &op0, &op1);
>>     else
>>       {
>> +      if ((REGNO (op0) == REGNO (operands[2])
>> +       || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
>> +      && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
>> +    {
>> +      mode = word_mode;
>> +      promote_p = true;
>> +    }
>> +
>>         loongarch_extend_comparands (code, &op0, &op1);
>>           op0 = force_reg (word_mode, op0);
>> +      op0_extend = op0;
>> +      op1_extend = force_reg (word_mode, op1);
>>           if (code == EQ || code == NE)
>>       {
>> @@ -4418,23 +4434,52 @@ loongarch_expand_conditional_move (rtx 
>> *operands)
>>         && register_operand (operands[2], VOIDmode)
>>         && register_operand (operands[3], VOIDmode))
>>       {
>> -      machine_mode mode = GET_MODE (operands[0]);
>> +      rtx op2 = operands[2];
>> +      rtx op3 = operands[3];
>> +
>> +      if (promote_p)
>> +    {
>> +      if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
>> +        op2 = op0_extend;
>> +      else
>> +        {
>> +          loongarch_extend_comparands (code, &op2, &const0_rtx);
>> +          op2 = force_reg (mode, op2);
>> +        }
>> +
>> +      if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
>> +        op3 = op1_extend;
>> +      else
>> +        {
>> +          loongarch_extend_comparands (code, &op3, &const0_rtx);
>> +          op3 = force_reg (mode, op3);
>> +        }
>> +    }
>> +
>>         rtx temp = gen_reg_rtx (mode);
>>         rtx temp2 = gen_reg_rtx (mode);
>>           emit_insn (gen_rtx_SET (temp,
>>                     gen_rtx_IF_THEN_ELSE (mode, cond,
>> -                            operands[2], const0_rtx)));
>> +                            op2, const0_rtx)));
>>           /* Flip the test for the second operand.  */
>>         cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE 
>> (op0), op0, op1);
>>           emit_insn (gen_rtx_SET (temp2,
>>                     gen_rtx_IF_THEN_ELSE (mode, cond,
>> -                            operands[3], const0_rtx)));
>> +                            op3, const0_rtx)));
>>           /* Merge the two results, at least one is guaranteed to be 
>> zero.  */
>> -      emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, 
>> temp2)));
>> +      if (promote_p)
>> +    {
>> +      rtx temp3 = gen_reg_rtx (mode);
>> +      emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
>> +      temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
>> +      loongarch_emit_move (operands[0], temp3);
>> +    }
>> +      else
>> +    emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, 
>> temp2)));
>>       }
>>     else
>>       emit_insn (gen_rtx_SET (operands[0],
>> diff --git a/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c 
>> b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
>> new file mode 100644
>> index 00000000000..3863db79aaf
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
>> @@ -0,0 +1,14 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-mabi=lp64d -O2" } */
>> +/* { dg-final { scan-assembler-not "slli.w" } } */
>> +
>> +extern int src1, src2, src3;
>> +
>> +int
>> +test (void)
>> +{
>> +  int data1 = src1 + src2;
>> +  int data2 = src1 + src3;
>> +
>> +  return test1 > test2 ? test1 : test2;
>> +}
> Otherwise I think this is okay, and nice catch! ;-)
  

Patch

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 86d58784113..1905599b9e8 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4384,14 +4384,30 @@  loongarch_expand_conditional_move (rtx *operands)
   enum rtx_code code = GET_CODE (operands[1]);
   rtx op0 = XEXP (operands[1], 0);
   rtx op1 = XEXP (operands[1], 1);
+  rtx op0_extend = op0;
+  rtx op1_extend = op1;
+
+  /* Record whether operands[2] and operands[3] modes are promoted to word_mode.  */
+  bool promote_p = false;
+  machine_mode mode = GET_MODE (operands[0]);
 
   if (FLOAT_MODE_P (GET_MODE (op1)))
     loongarch_emit_float_compare (&code, &op0, &op1);
   else
     {
+      if ((REGNO (op0) == REGNO (operands[2])
+	   || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
+	  && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
+	{
+	  mode = word_mode;
+	  promote_p = true;
+	}
+
       loongarch_extend_comparands (code, &op0, &op1);
 
       op0 = force_reg (word_mode, op0);
+      op0_extend = op0;
+      op1_extend = force_reg (word_mode, op1);
 
       if (code == EQ || code == NE)
 	{
@@ -4418,23 +4434,52 @@  loongarch_expand_conditional_move (rtx *operands)
       && register_operand (operands[2], VOIDmode)
       && register_operand (operands[3], VOIDmode))
     {
-      machine_mode mode = GET_MODE (operands[0]);
+      rtx op2 = operands[2];
+      rtx op3 = operands[3];
+
+      if (promote_p)
+	{
+	  if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
+	    op2 = op0_extend;
+	  else
+	    {
+	      loongarch_extend_comparands (code, &op2, &const0_rtx);
+	      op2 = force_reg (mode, op2);
+	    }
+
+	  if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
+	    op3 = op1_extend;
+	  else
+	    {
+	      loongarch_extend_comparands (code, &op3, &const0_rtx);
+	      op3 = force_reg (mode, op3);
+	    }
+	}
+
       rtx temp = gen_reg_rtx (mode);
       rtx temp2 = gen_reg_rtx (mode);
 
       emit_insn (gen_rtx_SET (temp,
 			      gen_rtx_IF_THEN_ELSE (mode, cond,
-						    operands[2], const0_rtx)));
+						    op2, const0_rtx)));
 
       /* Flip the test for the second operand.  */
       cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1);
 
       emit_insn (gen_rtx_SET (temp2,
 			      gen_rtx_IF_THEN_ELSE (mode, cond,
-						    operands[3], const0_rtx)));
+						    op3, const0_rtx)));
 
       /* Merge the two results, at least one is guaranteed to be zero.  */
-      emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
+      if (promote_p)
+	{
+	  rtx temp3 = gen_reg_rtx (mode);
+	  emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
+	  temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
+	  loongarch_emit_move (operands[0], temp3);
+	}
+      else
+	emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
     }
   else
     emit_insn (gen_rtx_SET (operands[0],
diff --git a/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
new file mode 100644
index 00000000000..3863db79aaf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2" } */
+/* { dg-final { scan-assembler-not "slli.w" } } */
+
+extern int src1, src2, src3;
+
+int
+test (void)
+{
+  int data1 = src1 + src2;
+  int data2 = src1 + src3;
+
+  return test1 > test2 ? test1 : test2;
+}