LoongArch: Optimize zero_extendqisi2 and zero_extendqidi2 patterns
Checks
Commit Message
For zero_extendqisi2 and zero_extendqidi2, use andi instead of bstrpick.w,
because andi is 6 times faster than bstrpick.w.
gcc/ChangeLog:
* config/loongarch/loongarch.md:
(zero_extend<SHORT:mode><GPR:mode>2): Rename to ..
(zero_extendhi<GPR:mode>2): .. this, use hi.
(zero_extendqihi2): Rename to ..
(zero_extendqi<HWD:mode>2): .. this, and extend to HWD.
(*zero_extend<GPR:mode>_trunc<SHORT:mode>): Rename to ..
(*zero_extend<GPR:mode>_trunchi): .. this, use hi.
(*zero_extendhi_truncqi): Rename to ..
(*zero_extend<HWD:mode>_truncqi): .. this, and extend to HWD.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/zeroextend-qi.c: New test.
Comments
Hi,jiahao:
The instruction latencies of the two instructions I tested here are
the same on 3a5000 and 3a6000.
This issue needs to be confirmed again.
在 2024/1/5 下午3:37, Jiahao Xu 写道:
> For zero_extendqisi2 and zero_extendqidi2, use andi instead of bstrpick.w,
> because andi is 6 times faster than bstrpick.w.
>
> gcc/ChangeLog:
>
> * config/loongarch/loongarch.md:
> (zero_extend<SHORT:mode><GPR:mode>2): Rename to ..
> (zero_extendhi<GPR:mode>2): .. this, use hi.
> (zero_extendqihi2): Rename to ..
> (zero_extendqi<HWD:mode>2): .. this, and extend to HWD.
> (*zero_extend<GPR:mode>_trunc<SHORT:mode>): Rename to ..
> (*zero_extend<GPR:mode>_trunchi): .. this, use hi.
> (*zero_extendhi_truncqi): Rename to ..
> (*zero_extend<HWD:mode>_truncqi): .. this, and extend to HWD.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/loongarch/zeroextend-qi.c: New test.
>
> diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
> index d1f5b94f5d6..843dee77a60 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -397,6 +397,9 @@
> ;; Likewise the 64-bit truncate-and-shift patterns.
> (define_mode_iterator SUBDI [QI HI SI])
>
> +;; Scalar fixed point modes but excludes QI.
> +(define_mode_iterator HWD [HI SI (DI "TARGET_64BIT")])
> +
> ;; Iterator for scalar fixed point modes.
> (define_mode_iterator QHWD [QI HI SI (DI "TARGET_64BIT")])
>
> @@ -1659,48 +1662,48 @@
> [(set_attr "move_type" "arith,load,load,load")
> (set_attr "mode" "DI")])
>
> -(define_insn "zero_extend<SHORT:mode><GPR:mode>2"
> +(define_insn "zero_extendhi<GPR:mode>2"
> [(set (match_operand:GPR 0 "register_operand" "=r,r,r")
> (zero_extend:GPR
> - (match_operand:SHORT 1 "nonimmediate_operand" "r,m,k")))]
> + (match_operand:HI 1 "nonimmediate_operand" "r,m,k")))]
> ""
> "@
> - bstrpick.w\t%0,%1,<SHORT:7_or_15>,0
> - ld.<SHORT:size>u\t%0,%1
> - ldx.<SHORT:size>u\t%0,%1"
> + bstrpick.w\t%0,%1,15,0
> + ld.hu\t%0,%1
> + ldx.hu\t%0,%1"
> [(set_attr "move_type" "pick_ins,load,load")
> (set_attr "mode" "<GPR:MODE>")])
>
> -(define_insn "zero_extendqihi2"
> - [(set (match_operand:HI 0 "register_operand" "=r,r,r")
> - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,k,m")))]
> +(define_insn "zero_extendqi<HWD:mode>2"
> + [(set (match_operand:HWD 0 "register_operand" "=r,r,r")
> + (zero_extend:HWD (match_operand:QI 1 "nonimmediate_operand" "r,k,m")))]
> ""
> "@
> andi\t%0,%1,0xff
> ldx.bu\t%0,%1
> ld.bu\t%0,%1"
> [(set_attr "move_type" "andi,load,load")
> - (set_attr "mode" "HI")])
> + (set_attr "mode" "<HWD:MODE>")])
>
> ;; Combiner patterns to optimize truncate/zero_extend combinations.
>
> -(define_insn "*zero_extend<GPR:mode>_trunc<SHORT:mode>"
> +(define_insn "*zero_extend<GPR:mode>_trunchi"
> [(set (match_operand:GPR 0 "register_operand" "=r")
> (zero_extend:GPR
> - (truncate:SHORT (match_operand:DI 1 "register_operand" "r"))))]
> + (truncate:HI (match_operand:DI 1 "register_operand" "r"))))]
> "TARGET_64BIT"
> - "bstrpick.w\t%0,%1,<SHORT:7_or_15>,0"
> + "bstrpick.w\t%0,%1,15,0"
> [(set_attr "move_type" "pick_ins")
> (set_attr "mode" "<GPR:MODE>")])
>
> -(define_insn "*zero_extendhi_truncqi"
> - [(set (match_operand:HI 0 "register_operand" "=r")
> - (zero_extend:HI
> +(define_insn "*zero_extend<HWD:mode>_truncqi"
> + [(set (match_operand:HWD 0 "register_operand" "=r")
> + (zero_extend:HWD
> (truncate:QI (match_operand:DI 1 "register_operand" "r"))))]
> "TARGET_64BIT"
> "andi\t%0,%1,0xff"
> [(set_attr "alu_type" "and")
> - (set_attr "mode" "HI")])
> + (set_attr "mode" "<HWD:MODE>")])
>
> ;;
> ;; ....................
> diff --git a/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c b/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c
> new file mode 100644
> index 00000000000..1da8cdad2ca
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { scan-assembler "andi" } } */
> +
> +#include <stdint.h>
> +
> +uint8_t
> +foo (uint64_t a, uint8_t b)
> +{
> + return a + b;
> +}
@@ -397,6 +397,9 @@
;; Likewise the 64-bit truncate-and-shift patterns.
(define_mode_iterator SUBDI [QI HI SI])
+;; Scalar fixed point modes but excludes QI.
+(define_mode_iterator HWD [HI SI (DI "TARGET_64BIT")])
+
;; Iterator for scalar fixed point modes.
(define_mode_iterator QHWD [QI HI SI (DI "TARGET_64BIT")])
@@ -1659,48 +1662,48 @@
[(set_attr "move_type" "arith,load,load,load")
(set_attr "mode" "DI")])
-(define_insn "zero_extend<SHORT:mode><GPR:mode>2"
+(define_insn "zero_extendhi<GPR:mode>2"
[(set (match_operand:GPR 0 "register_operand" "=r,r,r")
(zero_extend:GPR
- (match_operand:SHORT 1 "nonimmediate_operand" "r,m,k")))]
+ (match_operand:HI 1 "nonimmediate_operand" "r,m,k")))]
""
"@
- bstrpick.w\t%0,%1,<SHORT:7_or_15>,0
- ld.<SHORT:size>u\t%0,%1
- ldx.<SHORT:size>u\t%0,%1"
+ bstrpick.w\t%0,%1,15,0
+ ld.hu\t%0,%1
+ ldx.hu\t%0,%1"
[(set_attr "move_type" "pick_ins,load,load")
(set_attr "mode" "<GPR:MODE>")])
-(define_insn "zero_extendqihi2"
- [(set (match_operand:HI 0 "register_operand" "=r,r,r")
- (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,k,m")))]
+(define_insn "zero_extendqi<HWD:mode>2"
+ [(set (match_operand:HWD 0 "register_operand" "=r,r,r")
+ (zero_extend:HWD (match_operand:QI 1 "nonimmediate_operand" "r,k,m")))]
""
"@
andi\t%0,%1,0xff
ldx.bu\t%0,%1
ld.bu\t%0,%1"
[(set_attr "move_type" "andi,load,load")
- (set_attr "mode" "HI")])
+ (set_attr "mode" "<HWD:MODE>")])
;; Combiner patterns to optimize truncate/zero_extend combinations.
-(define_insn "*zero_extend<GPR:mode>_trunc<SHORT:mode>"
+(define_insn "*zero_extend<GPR:mode>_trunchi"
[(set (match_operand:GPR 0 "register_operand" "=r")
(zero_extend:GPR
- (truncate:SHORT (match_operand:DI 1 "register_operand" "r"))))]
+ (truncate:HI (match_operand:DI 1 "register_operand" "r"))))]
"TARGET_64BIT"
- "bstrpick.w\t%0,%1,<SHORT:7_or_15>,0"
+ "bstrpick.w\t%0,%1,15,0"
[(set_attr "move_type" "pick_ins")
(set_attr "mode" "<GPR:MODE>")])
-(define_insn "*zero_extendhi_truncqi"
- [(set (match_operand:HI 0 "register_operand" "=r")
- (zero_extend:HI
+(define_insn "*zero_extend<HWD:mode>_truncqi"
+ [(set (match_operand:HWD 0 "register_operand" "=r")
+ (zero_extend:HWD
(truncate:QI (match_operand:DI 1 "register_operand" "r"))))]
"TARGET_64BIT"
"andi\t%0,%1,0xff"
[(set_attr "alu_type" "and")
- (set_attr "mode" "HI")])
+ (set_attr "mode" "<HWD:MODE>")])
;;
;; ....................
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler "andi" } } */
+
+#include <stdint.h>
+
+uint8_t
+foo (uint64_t a, uint8_t b)
+{
+ return a + b;
+}