LoongArch: Generate bytepick.[wd] for suitable bit operation pattern
Checks
Commit Message
We can use bytepick.[wd] for
a << (8 * x) | b >> (8 * (sizeof(a) - x))
while a and b are uint32_t or uint64_t. This is useful for some cases,
for example:
https://sourceware.org/pipermail/libc-alpha/2023-February/145203.html
Bootstrapped and regtested on loongarch64-linux-gnu.
Ok for trunk (now or GCC 14 stage 1)?
gcc/ChangeLog:
* config/loongarch/loongarch.md (bytepick_w_ashift_amount):
New define_int_iterator.
(bytepick_d_ashift_amount): Likewise.
(bytepick_imm): New define_int_attr.
(bytepick_w_lshiftrt_amount): Likewise.
(bytepick_d_lshiftrt_amount): Likewise.
(bytepick_w_<bytepick_imm>): New define_insn template.
(bytepick_w_<bytepick_imm>_extend): Likewise.
(bytepick_d_<bytepick_imm>): Likewise.
(bytepick_w): Remove unused define_insn.
(bytepick_d): Likewise.
(UNSPEC_BYTEPICK_W): Remove unused unspec.
(UNSPEC_BYTEPICK_D): Likewise.
* config/loongarch/predicates.md (const_0_to_3_operand):
Remove unused define_predicate.
(const_0_to_7_operand): Likewise.
gcc/testsuite/ChangeLog:
* g++.target/loongarch/bytepick.C: New test.
---
gcc/config/loongarch/loongarch.md | 60 ++++++++++++++-----
gcc/config/loongarch/predicates.md | 8 ---
gcc/testsuite/g++.target/loongarch/bytepick.C | 32 ++++++++++
3 files changed, 77 insertions(+), 23 deletions(-)
create mode 100644 gcc/testsuite/g++.target/loongarch/bytepick.C
Comments
在 2023/2/4 上午1:50, Xi Ruoyao 写道:
> We can use bytepick.[wd] for
>
> a << (8 * x) | b >> (8 * (sizeof(a) - x))
>
> while a and b are uint32_t or uint64_t. This is useful for some cases,
> for example:
> https://sourceware.org/pipermail/libc-alpha/2023-February/145203.html
>
> Bootstrapped and regtested on loongarch64-linux-gnu.
> Ok for trunk (now or GCC 14 stage 1)?
LGTM!
I think this change is not big and can be merged into the trunk branch.
Thanks!
> gcc/ChangeLog:
>
> * config/loongarch/loongarch.md (bytepick_w_ashift_amount):
> New define_int_iterator.
> (bytepick_d_ashift_amount): Likewise.
> (bytepick_imm): New define_int_attr.
> (bytepick_w_lshiftrt_amount): Likewise.
> (bytepick_d_lshiftrt_amount): Likewise.
> (bytepick_w_<bytepick_imm>): New define_insn template.
> (bytepick_w_<bytepick_imm>_extend): Likewise.
> (bytepick_d_<bytepick_imm>): Likewise.
> (bytepick_w): Remove unused define_insn.
> (bytepick_d): Likewise.
> (UNSPEC_BYTEPICK_W): Remove unused unspec.
> (UNSPEC_BYTEPICK_D): Likewise.
> * config/loongarch/predicates.md (const_0_to_3_operand):
> Remove unused define_predicate.
> (const_0_to_7_operand): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * g++.target/loongarch/bytepick.C: New test.
> ---
> gcc/config/loongarch/loongarch.md | 60 ++++++++++++++-----
> gcc/config/loongarch/predicates.md | 8 ---
> gcc/testsuite/g++.target/loongarch/bytepick.C | 32 ++++++++++
> 3 files changed, 77 insertions(+), 23 deletions(-)
> create mode 100644 gcc/testsuite/g++.target/loongarch/bytepick.C
>
> diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
> index 0884ec09dfb..3509c3c21c1 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -48,8 +48,6 @@ (define_c_enum "unspec" [
> UNSPEC_EH_RETURN
>
> ;; Bit operation
> - UNSPEC_BYTEPICK_W
> - UNSPEC_BYTEPICK_D
> UNSPEC_BITREV_4B
> UNSPEC_BITREV_8B
>
> @@ -544,6 +542,27 @@ (define_int_attr lrint_allow_inexact [(UNSPEC_FTINT "1")
> (UNSPEC_FTINTRM "0")
> (UNSPEC_FTINTRP "0")])
>
> +;; Iterator and attributes for bytepick.d
> +(define_int_iterator bytepick_w_ashift_amount [8 16 24])
> +(define_int_attr bytepick_w_lshiftrt_amount [(8 "24")
> + (16 "16")
> + (24 "8")])
> +(define_int_iterator bytepick_d_ashift_amount [8 16 24 32 40 48 56])
> +(define_int_attr bytepick_d_lshiftrt_amount [(8 "56")
> + (16 "48")
> + (24 "40")
> + (32 "32")
> + (40 "24")
> + (48 "16")
> + (56 "8")])
> +(define_int_attr bytepick_imm [(8 "1")
> + (16 "2")
> + (24 "3")
> + (32 "4")
> + (40 "5")
> + (48 "6")
> + (56 "7")])
> +
> ;;
> ;; ....................
> ;;
> @@ -3364,24 +3383,35 @@ (define_insn "fclass_<fmt>"
> [(set_attr "type" "unknown")
> (set_attr "mode" "<MODE>")])
>
> -(define_insn "bytepick_w"
> +(define_insn "bytepick_w_<bytepick_imm>"
> [(set (match_operand:SI 0 "register_operand" "=r")
> - (unspec:SI [(match_operand:SI 1 "register_operand" "r")
> - (match_operand:SI 2 "register_operand" "r")
> - (match_operand:SI 3 "const_0_to_3_operand" "n")]
> - UNSPEC_BYTEPICK_W))]
> + (ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r")
> + (const_int <bytepick_w_lshiftrt_amount>))
> + (ashift (match_operand:SI 2 "register_operand" "r")
> + (const_int bytepick_w_ashift_amount))))]
> ""
> - "bytepick.w\t%0,%1,%2,%z3"
> + "bytepick.w\t%0,%1,%2,<bytepick_imm>"
> [(set_attr "mode" "SI")])
>
> -(define_insn "bytepick_d"
> +(define_insn "bytepick_w_<bytepick_imm>_extend"
> [(set (match_operand:DI 0 "register_operand" "=r")
> - (unspec:DI [(match_operand:DI 1 "register_operand" "r")
> - (match_operand:DI 2 "register_operand" "r")
> - (match_operand:DI 3 "const_0_to_7_operand" "n")]
> - UNSPEC_BYTEPICK_D))]
> - ""
> - "bytepick.d\t%0,%1,%2,%z3"
> + (sign_extend:DI
> + (ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r")
> + (const_int <bytepick_w_lshiftrt_amount>))
> + (ashift (match_operand:SI 2 "register_operand" "r")
> + (const_int bytepick_w_ashift_amount)))))]
> + "TARGET_64BIT"
> + "bytepick.w\t%0,%1,%2,<bytepick_imm>"
> + [(set_attr "mode" "SI")])
> +
> +(define_insn "bytepick_d_<bytepick_imm>"
> + [(set (match_operand:DI 0 "register_operand" "=r")
> + (ior:DI (lshiftrt (match_operand:DI 1 "register_operand" "r")
> + (const_int <bytepick_d_lshiftrt_amount>))
> + (ashift (match_operand:DI 2 "register_operand" "r")
> + (const_int bytepick_d_ashift_amount))))]
> + "TARGET_64BIT"
> + "bytepick.d\t%0,%1,%2,<bytepick_imm>"
> [(set_attr "mode" "DI")])
>
> (define_insn "bitrev_4b"
> diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
> index de0a363e2e4..95140280f1e 100644
> --- a/gcc/config/loongarch/predicates.md
> +++ b/gcc/config/loongarch/predicates.md
> @@ -91,14 +91,6 @@ (define_predicate "reg_or_1_operand"
> (ior (match_operand 0 "const_1_operand")
> (match_operand 0 "register_operand")))
>
> -(define_predicate "const_0_to_3_operand"
> - (and (match_code "const_int")
> - (match_test "IN_RANGE (INTVAL (op), 0, 3)")))
> -
> -(define_predicate "const_0_to_7_operand"
> - (and (match_code "const_int")
> - (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
> -
> (define_predicate "lu52i_mask_operand"
> (and (match_code "const_int")
> (match_test "UINTVAL (op) == 0xfffffffffffff")))
> diff --git a/gcc/testsuite/g++.target/loongarch/bytepick.C b/gcc/testsuite/g++.target/loongarch/bytepick.C
> new file mode 100644
> index 00000000000..a39e2fa65b7
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/loongarch/bytepick.C
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mabi=lp64d" } */
> +/* { dg-final { scan-assembler-times "bytepick.w\t\\\$r4,\\\$r5,\\\$r4" 3 } } */
> +/* { dg-final { scan-assembler-times "bytepick.d\t\\\$r4,\\\$r5,\\\$r4" 7 } } */
> +/* { dg-final { scan-assembler-not "slli.w" } } */
> +
> +template <class T, int offs>
> +T
> +merge (T a, T b)
> +{
> + return a << offs | b >> (8 * sizeof (T) - offs);
> +}
> +
> +using u32 = __UINT32_TYPE__;
> +using u64 = __UINT64_TYPE__;
> +using i64 = __INT64_TYPE__;
> +
> +template u32 merge<u32, 8> (u32, u32);
> +template u32 merge<u32, 16> (u32, u32);
> +template u32 merge<u32, 24> (u32, u32);
> +
> +template u64 merge<u64, 8> (u64, u64);
> +template u64 merge<u64, 16> (u64, u64);
> +template u64 merge<u64, 24> (u64, u64);
> +template u64 merge<u64, 32> (u64, u64);
> +template u64 merge<u64, 40> (u64, u64);
> +template u64 merge<u64, 48> (u64, u64);
> +template u64 merge<u64, 56> (u64, u64);
> +
> +/* we cannot use bytepick for the following cases */
> +template i64 merge<i64, 8> (i64, i64);
> +template u64 merge<u64, 42> (u64, u64);
On Mon, 2023-02-06 at 21:07 +0800, Lulu Cheng wrote:
>
> 在 2023/2/4 上午1:50, Xi Ruoyao 写道:
> > We can use bytepick.[wd] for
> >
> > a << (8 * x) | b >> (8 * (sizeof(a) - x))
> >
> > while a and b are uint32_t or uint64_t. This is useful for some cases,
> > for example:
> > https://sourceware.org/pipermail/libc-alpha/2023-February/145203.html
> >
> > Bootstrapped and regtested on loongarch64-linux-gnu.
> > Ok for trunk (now or GCC 14 stage 1)?
>
> LGTM!
>
> I think this change is not big and can be merged into the trunk branch.
Pushed r13-5710.
@@ -48,8 +48,6 @@ (define_c_enum "unspec" [
UNSPEC_EH_RETURN
;; Bit operation
- UNSPEC_BYTEPICK_W
- UNSPEC_BYTEPICK_D
UNSPEC_BITREV_4B
UNSPEC_BITREV_8B
@@ -544,6 +542,27 @@ (define_int_attr lrint_allow_inexact [(UNSPEC_FTINT "1")
(UNSPEC_FTINTRM "0")
(UNSPEC_FTINTRP "0")])
+;; Iterator and attributes for bytepick.d
+(define_int_iterator bytepick_w_ashift_amount [8 16 24])
+(define_int_attr bytepick_w_lshiftrt_amount [(8 "24")
+ (16 "16")
+ (24 "8")])
+(define_int_iterator bytepick_d_ashift_amount [8 16 24 32 40 48 56])
+(define_int_attr bytepick_d_lshiftrt_amount [(8 "56")
+ (16 "48")
+ (24 "40")
+ (32 "32")
+ (40 "24")
+ (48 "16")
+ (56 "8")])
+(define_int_attr bytepick_imm [(8 "1")
+ (16 "2")
+ (24 "3")
+ (32 "4")
+ (40 "5")
+ (48 "6")
+ (56 "7")])
+
;;
;; ....................
;;
@@ -3364,24 +3383,35 @@ (define_insn "fclass_<fmt>"
[(set_attr "type" "unknown")
(set_attr "mode" "<MODE>")])
-(define_insn "bytepick_w"
+(define_insn "bytepick_w_<bytepick_imm>"
[(set (match_operand:SI 0 "register_operand" "=r")
- (unspec:SI [(match_operand:SI 1 "register_operand" "r")
- (match_operand:SI 2 "register_operand" "r")
- (match_operand:SI 3 "const_0_to_3_operand" "n")]
- UNSPEC_BYTEPICK_W))]
+ (ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r")
+ (const_int <bytepick_w_lshiftrt_amount>))
+ (ashift (match_operand:SI 2 "register_operand" "r")
+ (const_int bytepick_w_ashift_amount))))]
""
- "bytepick.w\t%0,%1,%2,%z3"
+ "bytepick.w\t%0,%1,%2,<bytepick_imm>"
[(set_attr "mode" "SI")])
-(define_insn "bytepick_d"
+(define_insn "bytepick_w_<bytepick_imm>_extend"
[(set (match_operand:DI 0 "register_operand" "=r")
- (unspec:DI [(match_operand:DI 1 "register_operand" "r")
- (match_operand:DI 2 "register_operand" "r")
- (match_operand:DI 3 "const_0_to_7_operand" "n")]
- UNSPEC_BYTEPICK_D))]
- ""
- "bytepick.d\t%0,%1,%2,%z3"
+ (sign_extend:DI
+ (ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r")
+ (const_int <bytepick_w_lshiftrt_amount>))
+ (ashift (match_operand:SI 2 "register_operand" "r")
+ (const_int bytepick_w_ashift_amount)))))]
+ "TARGET_64BIT"
+ "bytepick.w\t%0,%1,%2,<bytepick_imm>"
+ [(set_attr "mode" "SI")])
+
+(define_insn "bytepick_d_<bytepick_imm>"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (lshiftrt (match_operand:DI 1 "register_operand" "r")
+ (const_int <bytepick_d_lshiftrt_amount>))
+ (ashift (match_operand:DI 2 "register_operand" "r")
+ (const_int bytepick_d_ashift_amount))))]
+ "TARGET_64BIT"
+ "bytepick.d\t%0,%1,%2,<bytepick_imm>"
[(set_attr "mode" "DI")])
(define_insn "bitrev_4b"
@@ -91,14 +91,6 @@ (define_predicate "reg_or_1_operand"
(ior (match_operand 0 "const_1_operand")
(match_operand 0 "register_operand")))
-(define_predicate "const_0_to_3_operand"
- (and (match_code "const_int")
- (match_test "IN_RANGE (INTVAL (op), 0, 3)")))
-
-(define_predicate "const_0_to_7_operand"
- (and (match_code "const_int")
- (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
-
(define_predicate "lu52i_mask_operand"
(and (match_code "const_int")
(match_test "UINTVAL (op) == 0xfffffffffffff")))
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d" } */
+/* { dg-final { scan-assembler-times "bytepick.w\t\\\$r4,\\\$r5,\\\$r4" 3 } } */
+/* { dg-final { scan-assembler-times "bytepick.d\t\\\$r4,\\\$r5,\\\$r4" 7 } } */
+/* { dg-final { scan-assembler-not "slli.w" } } */
+
+template <class T, int offs>
+T
+merge (T a, T b)
+{
+ return a << offs | b >> (8 * sizeof (T) - offs);
+}
+
+using u32 = __UINT32_TYPE__;
+using u64 = __UINT64_TYPE__;
+using i64 = __INT64_TYPE__;
+
+template u32 merge<u32, 8> (u32, u32);
+template u32 merge<u32, 16> (u32, u32);
+template u32 merge<u32, 24> (u32, u32);
+
+template u64 merge<u64, 8> (u64, u64);
+template u64 merge<u64, 16> (u64, u64);
+template u64 merge<u64, 24> (u64, u64);
+template u64 merge<u64, 32> (u64, u64);
+template u64 merge<u64, 40> (u64, u64);
+template u64 merge<u64, 48> (u64, u64);
+template u64 merge<u64, 56> (u64, u64);
+
+/* we cannot use bytepick for the following cases */
+template i64 merge<i64, 8> (i64, i64);
+template u64 merge<u64, 42> (u64, u64);