[2/3] LoongArch: Redundant sign extension elimination optimization.

Message ID 20240106085409.25985-2-chenglulu@loongson.cn
State Unresolved
Headers
Series [1/3] LoongArch: Optimized some of the symbolic expansion instructions generated during bitwise operations. |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

chenglulu Jan. 6, 2024, 8:54 a.m. UTC
  From: liwei <liwei@loongson.cn>

We found that the current combine optimization pass in gcc cannot handle
the following redundant sign extension situations:

(insn 77 76 78 5 (set (reg:SI 143)
	(plus:SI (subreg/s/u:SI (reg/v:DI 104 [ len ]) 0)
	    (const_int 1 [0x1]))) {addsi3}
    (expr_list:REG_DEAD (reg/v:DI 104 [ len ])
	(nil)))
(insn 78 77 82 5 (set (reg/v:DI 104 [ len ])
	(sign_extend:DI (reg:SI 143))) {extendsidi2}
	(nil))

Because reg:SI 143 is not died or set in insn 78, no replacement merge will
be performed for the insn sequence. We adjusted the add template to eliminate
redundant sign extensions during the expand pass.

gcc/ChangeLog:

	* config/loongarch/loongarch.md (add<mode>3): Removed.
	(*addsi3): New.
	(addsi3): New.
	(adddi3): New.
	(*addsi3_extended): Removed.
	(addsi3_extended): New.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/sign-extend.c: Moved to...
	* gcc.target/loongarch/sign-extend-1.c: ...here.
	* gcc.target/loongarch/sign-extend-2.c: New test.
---
 gcc/config/loongarch/loongarch.md             | 93 ++++++++++++++++---
 .../{sign-extend.c => sign-extend-1.c}        |  0
 .../gcc.target/loongarch/sign-extend-2.c      | 59 ++++++++++++
 3 files changed, 137 insertions(+), 15 deletions(-)
 rename gcc/testsuite/gcc.target/loongarch/{sign-extend.c => sign-extend-1.c} (100%)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
  

Comments

Xi Ruoyao Jan. 6, 2024, 10:36 a.m. UTC | #1
On Sat, 2024-01-06 at 16:54 +0800, Lulu Cheng wrote:
> +(define_expand "addsi3"
> +  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
> +	(plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r")
> +		 (match_operand:SI 2 "plus_si_operand"  "r,I,La,Le,Lb")))]
> +  ""
> +{
> +  if (TARGET_64BIT)

I think for 32 bit we can just skip the expand and use *addsi3?  I. e.
add TARGET_64BIT into the expand condition two lines above.

> +    {
> +      if (CONST_INT_P (operands[2]) && !IMM12_INT (operands[2])
> +	  && ADDU16I_OPERAND (INTVAL (operands[2])))
> +	{
> +	  rtx t1 = gen_reg_rtx (DImode);
> +	  rtx t2 = gen_reg_rtx (DImode);
> +	  rtx t3 = gen_reg_rtx (DImode);
> +	  emit_insn (gen_extend_insn (t1, operands[1], DImode, SImode, 0));
> +	  t2 = operands[2];
> +	  emit_insn (gen_adddi3 (t3, t1, t2));
> +	  t3 = gen_lowpart (SImode, t3);
> +	  emit_move_insn (operands[0], t3);
> +	  DONE;
> +	}
> +      else
> +	{
> +	  rtx t = gen_reg_rtx (DImode);
> +	  emit_insn (gen_addsi3_extended (t, operands[1], operands[2]));

AFAIK if !TARGET_64BIT a DImode should be actually a pair of hardware
registers, but addsi3_extended don't output such a pair so this seems
invalid...

> +	  t = gen_lowpart (SImode, t);
> +	  SUBREG_PROMOTED_VAR_P (t) = 1;
> +	  SUBREG_PROMOTED_SET (t, SRP_SIGNED);
> +	  emit_move_insn (operands[0], t);
> +	  DONE;
> +	}
> +    }
> +})
  

Patch

diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 436b9a93235..17ec401f535 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -657,15 +657,15 @@  (define_insn "add<mode>3"
   [(set_attr "type" "fadd")
    (set_attr "mode" "<UNITMODE>")])
 
-(define_insn_and_split "add<mode>3"
-  [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r")
-	(plus:GPR (match_operand:GPR 1 "register_operand" "r,r,r,r,r,r,r")
-		  (match_operand:GPR 2 "plus_<mode>_operand"
+(define_insn_and_split "*addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r,r,r")
+		  (match_operand:SI 2 "plus_si_operand"
 				       "r,I,La,Lb,Lc,Ld,Le")))]
   ""
   "@
-   add.<d>\t%0,%1,%2
-   addi.<d>\t%0,%1,%2
+   add.w\t%0,%1,%2
+   addi.w\t%0,%1,%2
    #
    * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \
      return \"addu16i.d\t%0,%1,%2\";
@@ -674,25 +674,88 @@  (define_insn_and_split "add<mode>3"
    #"
   "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
    && !ADDU16I_OPERAND (INTVAL (operands[2]))"
-  [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
-   (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))]
+  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4)))]
   {
-    loongarch_split_plus_constant (&operands[2], <MODE>mode);
+    loongarch_split_plus_constant (&operands[2], SImode);
   }
   [(set_attr "alu_type" "add")
-   (set_attr "mode" "<MODE>")
+   (set_attr "mode" "SI")
    (set_attr "insn_count" "1,1,2,1,2,2,2")
    (set (attr "enabled")
       (cond
-	[(match_test "<MODE>mode != DImode && which_alternative == 4")
+	[(match_test "which_alternative == 4")
 	 (const_string "no")
-	 (match_test "<MODE>mode != DImode && which_alternative == 5")
-	 (const_string "no")
-	 (match_test "<MODE>mode != SImode && which_alternative == 6")
+	 (match_test "which_alternative == 5")
+	 (const_string "no")]
+	(const_string "yes")))])
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r")
+		 (match_operand:SI 2 "plus_si_operand"  "r,I,La,Le,Lb")))]
+  ""
+{
+  if (TARGET_64BIT)
+    {
+      if (CONST_INT_P (operands[2]) && !IMM12_INT (operands[2])
+	  && ADDU16I_OPERAND (INTVAL (operands[2])))
+	{
+	  rtx t1 = gen_reg_rtx (DImode);
+	  rtx t2 = gen_reg_rtx (DImode);
+	  rtx t3 = gen_reg_rtx (DImode);
+	  emit_insn (gen_extend_insn (t1, operands[1], DImode, SImode, 0));
+	  t2 = operands[2];
+	  emit_insn (gen_adddi3 (t3, t1, t2));
+	  t3 = gen_lowpart (SImode, t3);
+	  emit_move_insn (operands[0], t3);
+	  DONE;
+	}
+      else
+	{
+	  rtx t = gen_reg_rtx (DImode);
+	  emit_insn (gen_addsi3_extended (t, operands[1], operands[2]));
+	  t = gen_lowpart (SImode, t);
+	  SUBREG_PROMOTED_VAR_P (t) = 1;
+	  SUBREG_PROMOTED_SET (t, SRP_SIGNED);
+	  emit_move_insn (operands[0], t);
+	  DONE;
+	}
+    }
+})
+
+(define_insn_and_split "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r,r,r,r,r,r,r")
+		  (match_operand:DI 2 "plus_di_operand"
+				       "r,I,La,Lb,Lc,Ld,Le")))]
+  ""
+  "@
+   add.d\t%0,%1,%2
+   addi.d\t%0,%1,%2
+   #
+   * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \
+     return \"addu16i.d\t%0,%1,%2\";
+   #
+   #
+   #"
+  "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
+   && !ADDU16I_OPERAND (INTVAL (operands[2]))"
+  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
+  {
+    loongarch_split_plus_constant (&operands[2], DImode);
+  }
+  [(set_attr "alu_type" "add")
+   (set_attr "mode" "DI")
+   (set_attr "insn_count" "1,1,2,1,2,2,2")
+   (set (attr "enabled")
+      (cond
+	[(match_test "which_alternative == 6")
 	 (const_string "no")]
 	(const_string "yes")))])
 
-(define_insn_and_split "*addsi3_extended"
+(define_insn_and_split "addsi3_extended"
   [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
 	(sign_extend:DI
 	     (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r")
diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-1.c
similarity index 100%
rename from gcc/testsuite/gcc.target/loongarch/sign-extend.c
rename to gcc/testsuite/gcc.target/loongarch/sign-extend-1.c
diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
new file mode 100644
index 00000000000..a45dde4f73f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
@@ -0,0 +1,59 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2" } */
+/* { dg-final { scan-assembler-times "slli.w\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,0" 1 } } */
+
+#include <stdint.h>
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+void
+bt_skip_func (const uint32_t len_limit, const uint32_t pos,
+              const uint8_t *const cur, uint32_t cur_match,
+              uint32_t *const son, const uint32_t cyclic_pos,
+              const uint32_t cyclic_size)
+{
+  uint32_t *ptr0 = son + (cyclic_pos << 1) + 1;
+  uint32_t *ptr1 = son + (cyclic_pos << 1);
+
+  uint32_t len0 = 0;
+  uint32_t len1 = 0;
+
+  while (1)
+    {
+      const uint32_t delta = pos - cur_match;
+      uint32_t *pair
+          = son
+            + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0))
+               << 1);
+      const uint8_t *pb = cur - delta;
+      uint32_t len = my_min (len0, len1);
+
+      if (pb[len] == cur[len])
+        {
+          while (++len != len_limit)
+            if (pb[len] != cur[len])
+              break;
+
+          if (len == len_limit)
+            {
+              *ptr1 = pair[0];
+              *ptr0 = pair[1];
+              return;
+            }
+        }
+
+      if (pb[len] < cur[len])
+        {
+          *ptr1 = cur_match;
+          ptr1 = pair + 1;
+          cur_match = *ptr1;
+          len1 = len;
+        }
+      else
+        {
+          *ptr0 = cur_match;
+          ptr0 = pair;
+          cur_match = *ptr0;
+          len0 = len;
+        }
+    }
+}