LoongArch: Optimize zero_extendqisi2 and zero_extendqidi2 patterns

Message ID 20240105073744.1800307-1-xujiahao@loongson.cn
State Accepted
Headers
Series LoongArch: Optimize zero_extendqisi2 and zero_extendqidi2 patterns |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Jiahao Xu Jan. 5, 2024, 7:37 a.m. UTC
  For zero_extendqisi2 and zero_extendqidi2, use andi instead of bstrpick.w,
because andi is 6 times faster than bstrpick.w.

gcc/ChangeLog:

	* config/loongarch/loongarch.md:
	(zero_extend<SHORT:mode><GPR:mode>2): Rename to ..
	(zero_extendhi<GPR:mode>2): .. this, use hi.
	(zero_extendqihi2): Rename to ..
	(zero_extendqi<HWD:mode>2): .. this, and extend to HWD.
	(*zero_extend<GPR:mode>_trunc<SHORT:mode>): Rename to ..
	(*zero_extend<GPR:mode>_trunchi): .. this, use hi.
	(*zero_extendhi_truncqi): Rename to ..
	(*zero_extend<HWD:mode>_truncqi): .. this, and extend to HWD.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/zeroextend-qi.c: New test.
  

Comments

chenglulu Jan. 6, 2024, 8:42 a.m. UTC | #1
Hi,jiahao:

  The instruction latencies of the two instructions I tested here are 
the same on 3a5000 and 3a6000.

This issue needs to be confirmed again.

在 2024/1/5 下午3:37, Jiahao Xu 写道:
> For zero_extendqisi2 and zero_extendqidi2, use andi instead of bstrpick.w,
> because andi is 6 times faster than bstrpick.w.
>
> gcc/ChangeLog:
>
> 	* config/loongarch/loongarch.md:
> 	(zero_extend<SHORT:mode><GPR:mode>2): Rename to ..
> 	(zero_extendhi<GPR:mode>2): .. this, use hi.
> 	(zero_extendqihi2): Rename to ..
> 	(zero_extendqi<HWD:mode>2): .. this, and extend to HWD.
> 	(*zero_extend<GPR:mode>_trunc<SHORT:mode>): Rename to ..
> 	(*zero_extend<GPR:mode>_trunchi): .. this, use hi.
> 	(*zero_extendhi_truncqi): Rename to ..
> 	(*zero_extend<HWD:mode>_truncqi): .. this, and extend to HWD.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/loongarch/zeroextend-qi.c: New test.
>
> diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
> index d1f5b94f5d6..843dee77a60 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -397,6 +397,9 @@
>   ;; Likewise the 64-bit truncate-and-shift patterns.
>   (define_mode_iterator SUBDI [QI HI SI])
>   
> +;; Scalar fixed point modes but excludes QI.
> +(define_mode_iterator HWD [HI SI (DI "TARGET_64BIT")])
> +
>   ;; Iterator for scalar fixed point modes.
>   (define_mode_iterator QHWD [QI HI SI (DI "TARGET_64BIT")])
>   
> @@ -1659,48 +1662,48 @@
>     [(set_attr "move_type" "arith,load,load,load")
>      (set_attr "mode" "DI")])
>   
> -(define_insn "zero_extend<SHORT:mode><GPR:mode>2"
> +(define_insn "zero_extendhi<GPR:mode>2"
>     [(set (match_operand:GPR 0 "register_operand" "=r,r,r")
>   	(zero_extend:GPR
> -	     (match_operand:SHORT 1 "nonimmediate_operand" "r,m,k")))]
> +	     (match_operand:HI 1 "nonimmediate_operand" "r,m,k")))]
>     ""
>     "@
> -   bstrpick.w\t%0,%1,<SHORT:7_or_15>,0
> -   ld.<SHORT:size>u\t%0,%1
> -   ldx.<SHORT:size>u\t%0,%1"
> +   bstrpick.w\t%0,%1,15,0
> +   ld.hu\t%0,%1
> +   ldx.hu\t%0,%1"
>     [(set_attr "move_type" "pick_ins,load,load")
>      (set_attr "mode" "<GPR:MODE>")])
>   
> -(define_insn "zero_extendqihi2"
> -  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
> -	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,k,m")))]
> +(define_insn "zero_extendqi<HWD:mode>2"
> +  [(set (match_operand:HWD 0 "register_operand" "=r,r,r")
> +	(zero_extend:HWD (match_operand:QI 1 "nonimmediate_operand" "r,k,m")))]
>     ""
>     "@
>      andi\t%0,%1,0xff
>      ldx.bu\t%0,%1
>      ld.bu\t%0,%1"
>     [(set_attr "move_type" "andi,load,load")
> -   (set_attr "mode" "HI")])
> +   (set_attr "mode" "<HWD:MODE>")])
>   
>   ;; Combiner patterns to optimize truncate/zero_extend combinations.
>   
> -(define_insn "*zero_extend<GPR:mode>_trunc<SHORT:mode>"
> +(define_insn "*zero_extend<GPR:mode>_trunchi"
>     [(set (match_operand:GPR 0 "register_operand" "=r")
>   	(zero_extend:GPR
> -	    (truncate:SHORT (match_operand:DI 1 "register_operand" "r"))))]
> +	    (truncate:HI (match_operand:DI 1 "register_operand" "r"))))]
>     "TARGET_64BIT"
> -  "bstrpick.w\t%0,%1,<SHORT:7_or_15>,0"
> +  "bstrpick.w\t%0,%1,15,0"
>     [(set_attr "move_type" "pick_ins")
>      (set_attr "mode" "<GPR:MODE>")])
>   
> -(define_insn "*zero_extendhi_truncqi"
> -  [(set (match_operand:HI 0 "register_operand" "=r")
> -	(zero_extend:HI
> +(define_insn "*zero_extend<HWD:mode>_truncqi"
> +  [(set (match_operand:HWD 0 "register_operand" "=r")
> +	(zero_extend:HWD
>   	    (truncate:QI (match_operand:DI 1 "register_operand" "r"))))]
>     "TARGET_64BIT"
>     "andi\t%0,%1,0xff"
>     [(set_attr "alu_type" "and")
> -   (set_attr "mode" "HI")])
> +   (set_attr "mode" "<HWD:MODE>")])
>   
>   ;;
>   ;;  ....................
> diff --git a/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c b/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c
> new file mode 100644
> index 00000000000..1da8cdad2ca
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { scan-assembler "andi" } } */
> +
> +#include <stdint.h>
> +
> +uint8_t
> +foo (uint64_t a, uint8_t b)
> +{
> +  return a + b;
> +}
  

Patch

diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index d1f5b94f5d6..843dee77a60 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -397,6 +397,9 @@ 
 ;; Likewise the 64-bit truncate-and-shift patterns.
 (define_mode_iterator SUBDI [QI HI SI])
 
+;; Scalar fixed point modes but excludes QI.
+(define_mode_iterator HWD [HI SI (DI "TARGET_64BIT")])
+
 ;; Iterator for scalar fixed point modes.
 (define_mode_iterator QHWD [QI HI SI (DI "TARGET_64BIT")])
 
@@ -1659,48 +1662,48 @@ 
   [(set_attr "move_type" "arith,load,load,load")
    (set_attr "mode" "DI")])
 
-(define_insn "zero_extend<SHORT:mode><GPR:mode>2"
+(define_insn "zero_extendhi<GPR:mode>2"
   [(set (match_operand:GPR 0 "register_operand" "=r,r,r")
 	(zero_extend:GPR
-	     (match_operand:SHORT 1 "nonimmediate_operand" "r,m,k")))]
+	     (match_operand:HI 1 "nonimmediate_operand" "r,m,k")))]
   ""
   "@
-   bstrpick.w\t%0,%1,<SHORT:7_or_15>,0
-   ld.<SHORT:size>u\t%0,%1
-   ldx.<SHORT:size>u\t%0,%1"
+   bstrpick.w\t%0,%1,15,0
+   ld.hu\t%0,%1
+   ldx.hu\t%0,%1"
   [(set_attr "move_type" "pick_ins,load,load")
    (set_attr "mode" "<GPR:MODE>")])
 
-(define_insn "zero_extendqihi2"
-  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
-	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,k,m")))]
+(define_insn "zero_extendqi<HWD:mode>2"
+  [(set (match_operand:HWD 0 "register_operand" "=r,r,r")
+	(zero_extend:HWD (match_operand:QI 1 "nonimmediate_operand" "r,k,m")))]
   ""
   "@
    andi\t%0,%1,0xff
    ldx.bu\t%0,%1
    ld.bu\t%0,%1"
   [(set_attr "move_type" "andi,load,load")
-   (set_attr "mode" "HI")])
+   (set_attr "mode" "<HWD:MODE>")])
 
 ;; Combiner patterns to optimize truncate/zero_extend combinations.
 
-(define_insn "*zero_extend<GPR:mode>_trunc<SHORT:mode>"
+(define_insn "*zero_extend<GPR:mode>_trunchi"
   [(set (match_operand:GPR 0 "register_operand" "=r")
 	(zero_extend:GPR
-	    (truncate:SHORT (match_operand:DI 1 "register_operand" "r"))))]
+	    (truncate:HI (match_operand:DI 1 "register_operand" "r"))))]
   "TARGET_64BIT"
-  "bstrpick.w\t%0,%1,<SHORT:7_or_15>,0"
+  "bstrpick.w\t%0,%1,15,0"
   [(set_attr "move_type" "pick_ins")
    (set_attr "mode" "<GPR:MODE>")])
 
-(define_insn "*zero_extendhi_truncqi"
-  [(set (match_operand:HI 0 "register_operand" "=r")
-	(zero_extend:HI
+(define_insn "*zero_extend<HWD:mode>_truncqi"
+  [(set (match_operand:HWD 0 "register_operand" "=r")
+	(zero_extend:HWD
 	    (truncate:QI (match_operand:DI 1 "register_operand" "r"))))]
   "TARGET_64BIT"
   "andi\t%0,%1,0xff"
   [(set_attr "alu_type" "and")
-   (set_attr "mode" "HI")])
+   (set_attr "mode" "<HWD:MODE>")])
 
 ;;
 ;;  ....................
diff --git a/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c b/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c
new file mode 100644
index 00000000000..1da8cdad2ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler "andi" } } */
+
+#include <stdint.h>
+
+uint8_t
+foo (uint64_t a, uint8_t b)
+{
+  return a + b;
+}