[v2] LoongArch: Optimize the loading of immediate numbers with the same high and low 32-bit values

Message ID 20231123030417.29993-1-guojie@loongson.cn
State Accepted
Headers
Series [v2] LoongArch: Optimize the loading of immediate numbers with the same high and low 32-bit values |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Guo Jie Nov. 23, 2023, 3:04 a.m. UTC
  For the following immediate load operation in gcc/testsuite/gcc.target/loongarch/imm-load1.c:

	long long r = 0x0101010101010101;

Before this patch:

	lu12i.w	    $r15,16842752>>12
	ori	    $r15,$r15,257
	lu32i.d	    $r15,0x1010100000000>>32
	lu52i.d	    $r15,$r15,0x100000000000000>>52

After this patch:

	lu12i.w     $r15,16842752>>12
	ori         $r15,$r15,257
	bstrins.d   $r15,$r15,63,32

gcc/ChangeLog:

	* config/loongarch/loongarch.cc
	(enum loongarch_load_imm_method): Add new method.
	(loongarch_build_integer): Add relevant implementations for
	new method.
	(loongarch_move_integer): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/imm-load1.c: Change old check.

---
Update in v2:
	1. Correct the format of ChangeLog.
	2. Avoid left shift of negative value in loongarch_build_integer.

---
 gcc/config/loongarch/loongarch.cc             | 22 ++++++++++++++++++-
 .../gcc.target/loongarch/imm-load1.c          |  3 ++-
 2 files changed, 23 insertions(+), 2 deletions(-)
  

Comments

Xi Ruoyao Nov. 23, 2023, 3:28 a.m. UTC | #1
On Thu, 2023-11-23 at 11:04 +0800, Guo Jie wrote:
> For the following immediate load operation in gcc/testsuite/gcc.target/loongarch/imm-load1.c:
> 
> 	long long r = 0x0101010101010101;
> 
> Before this patch:
> 
> 	lu12i.w	    $r15,16842752>>12
> 	ori	    $r15,$r15,257
> 	lu32i.d	    $r15,0x1010100000000>>32
> 	lu52i.d	    $r15,$r15,0x100000000000000>>52
> 
> After this patch:
> 
> 	lu12i.w     $r15,16842752>>12
> 	ori         $r15,$r15,257
> 	bstrins.d   $r15,$r15,63,32
> 
> gcc/ChangeLog:
> 
> 	* config/loongarch/loongarch.cc
> 	(enum loongarch_load_imm_method): Add new method.
> 	(loongarch_build_integer): Add relevant implementations for
> 	new method.
> 	(loongarch_move_integer): Ditto.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/loongarch/imm-load1.c: Change old check.
> 
> ---
> Update in v2:
> 	1. Correct the format of ChangeLog.
> 	2. Avoid left shift of negative value in loongarch_build_integer.

LGTM.

> 
> ---
>  gcc/config/loongarch/loongarch.cc             | 22 ++++++++++++++++++-
>  .../gcc.target/loongarch/imm-load1.c          |  3 ++-
>  2 files changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
> index d05743bec87..f95507e2348 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -142,12 +142,16 @@ struct loongarch_address_info
>  
>     METHOD_LU52I:
>       Load 52-63 bit of the immediate number.
> +
> +   METHOD_MIRROR:
> +     Copy 0-31 bit of the immediate number to 32-63bit.
>  */
>  enum loongarch_load_imm_method
>  {
>    METHOD_NORMAL,
>    METHOD_LU32I,
> -  METHOD_LU52I
> +  METHOD_LU52I,
> +  METHOD_MIRROR
>  };
>  
>  struct loongarch_integer_op
> @@ -1556,11 +1560,23 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
>  
>        int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31;
>        int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51;
> +
> +      uint32_t hival = (uint32_t) (value >> 32);
> +      uint32_t loval = (uint32_t) value;
> +
>        /* Determine whether the upper 32 bits are sign-extended from the lower
>  	 32 bits. If it is, the instructions to load the high order can be
>  	 ommitted.  */
>        if (lu32i[sign31] && lu52i[sign31])
>  	return cost;
> +      /* If the lower 32 bits are the same as the upper 32 bits, just copy
> +	 the lower 32 bits to the upper 32 bits.  */
> +      else if (loval == hival)
> +	{
> +	  codes[cost].method = METHOD_MIRROR;
> +	  codes[cost].curr_value = value;
> +	  return cost + 1;
> +	}
>        /* Determine whether bits 32-51 are sign-extended from the lower 32
>  	 bits. If so, directly load 52-63 bits.  */
>        else if (lu32i[sign31])
> @@ -3230,6 +3246,10 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
>  			   gen_rtx_AND (DImode, x, GEN_INT (0xfffffffffffff)),
>  			   GEN_INT (codes[i].value));
>  	  break;
> +	case METHOD_MIRROR:
> +	  gcc_assert (mode == DImode);
> +	  emit_insn (gen_insvdi (x, GEN_INT (32), GEN_INT (32), x));
> +	  break;
>  	default:
>  	  gcc_unreachable ();
>  	}
> diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load1.c b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
> index 2ff02971239..f64cc2956a3 100644
> --- a/gcc/testsuite/gcc.target/loongarch/imm-load1.c
> +++ b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
> @@ -1,6 +1,7 @@
>  /* { dg-do compile } */
>  /* { dg-options "-mabi=lp64d -O2" } */
> -/* { dg-final { scan-assembler "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
> +/* { dg-final { scan-assembler-not "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
> +/* { dg-final { scan-assembler "test:.*lu12i\.w.*\n\tbstrins\.d.*\n\.L2:" } } */
>  
>  
>  extern long long b[10];
  

Patch

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index d05743bec87..f95507e2348 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -142,12 +142,16 @@  struct loongarch_address_info
 
    METHOD_LU52I:
      Load 52-63 bit of the immediate number.
+
+   METHOD_MIRROR:
+     Copy 0-31 bit of the immediate number to 32-63bit.
 */
 enum loongarch_load_imm_method
 {
   METHOD_NORMAL,
   METHOD_LU32I,
-  METHOD_LU52I
+  METHOD_LU52I,
+  METHOD_MIRROR
 };
 
 struct loongarch_integer_op
@@ -1556,11 +1560,23 @@  loongarch_build_integer (struct loongarch_integer_op *codes,
 
       int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31;
       int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51;
+
+      uint32_t hival = (uint32_t) (value >> 32);
+      uint32_t loval = (uint32_t) value;
+
       /* Determine whether the upper 32 bits are sign-extended from the lower
 	 32 bits. If it is, the instructions to load the high order can be
 	 ommitted.  */
       if (lu32i[sign31] && lu52i[sign31])
 	return cost;
+      /* If the lower 32 bits are the same as the upper 32 bits, just copy
+	 the lower 32 bits to the upper 32 bits.  */
+      else if (loval == hival)
+	{
+	  codes[cost].method = METHOD_MIRROR;
+	  codes[cost].curr_value = value;
+	  return cost + 1;
+	}
       /* Determine whether bits 32-51 are sign-extended from the lower 32
 	 bits. If so, directly load 52-63 bits.  */
       else if (lu32i[sign31])
@@ -3230,6 +3246,10 @@  loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
 			   gen_rtx_AND (DImode, x, GEN_INT (0xfffffffffffff)),
 			   GEN_INT (codes[i].value));
 	  break;
+	case METHOD_MIRROR:
+	  gcc_assert (mode == DImode);
+	  emit_insn (gen_insvdi (x, GEN_INT (32), GEN_INT (32), x));
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load1.c b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
index 2ff02971239..f64cc2956a3 100644
--- a/gcc/testsuite/gcc.target/loongarch/imm-load1.c
+++ b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
@@ -1,6 +1,7 @@ 
 /* { dg-do compile } */
 /* { dg-options "-mabi=lp64d -O2" } */
-/* { dg-final { scan-assembler "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
+/* { dg-final { scan-assembler-not "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
+/* { dg-final { scan-assembler "test:.*lu12i\.w.*\n\tbstrins\.d.*\n\.L2:" } } */
 
 
 extern long long b[10];