[ARC] Improved SImode shifts and rotates with -mswap.

Message ID 005001da08e0$f42fa2b0$dc8ee810$@nextmovesoftware.com
State Unresolved
Headers
Series [ARC] Improved SImode shifts and rotates with -mswap. |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Roger Sayle Oct. 27, 2023, 2:22 p.m. UTC
  This patch improves the code generated by the ARC back-end for CPUs
without a barrel shifter but with -mswap.  The -mswap option provides
a SWAP instruction that implements SImode rotations by 16, but also
logical shift instructions (left and right) by 16 bits.  Clearly these
are also useful building blocks for implementing shifts by 17, 18, etc.
which would otherwise require a loop.

As a representative example:
int shl20 (int x) { return x << 20; }

GCC with -O2 -mcpu=em -mswap would previously generate:

shl20:  mov     lp_count,10
        lp      2f
        add     r0,r0,r0
        add     r0,r0,r0
2:      # end single insn loop
        j_s     [blink]

with this patch we now generate:

shl20:  mov_s   r2,0    ;3
        lsl16   r0,r0
        add3    r0,r2,r0
        j_s.d   [blink]
        asl_s r0,r0

Although both are four instructions (excluding the j_s),
the original takes ~22 cycles, and replacement ~4 cycles.


Tested with a cross-compiler to arc-linux hosted on x86_64,
with no new (compile-only) regressions from make -k check.
Ok for mainline if this passes Claudiu's nightly testing?


2023-10-27  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
        * config/arc/arc.cc (arc_split_ashl): Use lsl16 on TARGET_SWAP.
        (arc_split_ashr): Use swap and sign-extend on TARGET_SWAP.
        (arc_split_lshr): Use lsr16 on TARGET_SWAP.
        (arc_split_rotl): Use swap on TARGET_SWAP.
        (arc_split_rotr): Likewise.
        * config/arc/arc.md (ANY_ROTATE): New code iterator.
        (<ANY_ROTATE>si2_cnt16): New define_insn for alternate form of
        swap instruction on TARGET_SWAP.
        (ashlsi2_cnt16): Rename from *ashlsi16_cnt16 and move earlier.
        (lshrsi2_cnt16): New define_insn for LSR16 instruction.
        (*ashlsi2_cnt16): See above.

gcc/testsuite/ChangeLog
        * gcc.target/arc/lsl16-1.c: New test case.
        * gcc.target/arc/lsr16-1.c: Likewise.
        * gcc.target/arc/swap-1.c: Likewise.
        * gcc.target/arc/swap-2.c: Likewise.


Thanks in advance,
Roger
--
  

Comments

Jeff Law Oct. 27, 2023, 2:37 p.m. UTC | #1
On 10/27/23 08:22, Roger Sayle wrote:
> 
> This patch improves the code generated by the ARC back-end for CPUs
> without a barrel shifter but with -mswap.  The -mswap option provides
> a SWAP instruction that implements SImode rotations by 16, but also
> logical shift instructions (left and right) by 16 bits.  Clearly these
> are also useful building blocks for implementing shifts by 17, 18, etc.
> which would otherwise require a loop.
> 
> As a representative example:
> int shl20 (int x) { return x << 20; }
> 
> GCC with -O2 -mcpu=em -mswap would previously generate:
> 
> shl20:  mov     lp_count,10
>          lp      2f
>          add     r0,r0,r0
>          add     r0,r0,r0
> 2:      # end single insn loop
>          j_s     [blink]
> 
> with this patch we now generate:
> 
> shl20:  mov_s   r2,0    ;3
>          lsl16   r0,r0
>          add3    r0,r2,r0
>          j_s.d   [blink]
>          asl_s r0,r0
> 
> Although both are four instructions (excluding the j_s),
> the original takes ~22 cycles, and replacement ~4 cycles.
> 
> 
> Tested with a cross-compiler to arc-linux hosted on x86_64,
> with no new (compile-only) regressions from make -k check.
> Ok for mainline if this passes Claudiu's nightly testing?
Not a review, just a comment.

The H8 has a ton of shift synthesis.  If you're looking for inspiration 
to improve this stuff further for ARC, it might be worth a looksie.


Jeff
  
Claudiu Zissulescu Ianculescu Oct. 30, 2023, 1:36 p.m. UTC | #2
Hi Roger,

+(define_insn "<insn>si2_cnt16"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w")

Please use "register_operand", and "r" constraint.

+    (ANY_ROTATE:SI (match_operand:SI 1 "register_operand" "c")

Please use "r" constraint instead of "c".

+               (const_int 16)))]
+  "TARGET_SWAP"
+  "swap\\t%0,%1"

Otherwise, it looks good to me. Please fix the above and proceed with
your commit.

Thank you for your contribution,
Claudiu
  

Patch

diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc
index 353ac69..e98692a 100644
--- a/gcc/config/arc/arc.cc
+++ b/gcc/config/arc/arc.cc
@@ -4256,6 +4256,17 @@  arc_split_ashl (rtx *operands)
 	    }
 	  return;
 	}
+      else if (n >= 16 && n <= 22 && TARGET_SWAP && TARGET_V2)
+	{
+	  emit_insn (gen_ashlsi2_cnt16 (operands[0], operands[1]));
+	  if (n > 16)
+	    {
+	      operands[1] = operands[0];
+	      operands[2] = GEN_INT (n - 16);
+	      arc_split_ashl (operands);
+	    }
+	  return;
+	}
       else if (n >= 29)
 	{
 	  if (n < 31)
@@ -4300,6 +4311,15 @@  arc_split_ashr (rtx *operands)
 	    emit_move_insn (operands[0], operands[1]);
 	  return;
 	}
+      else if (n >= 16 && n <= 18 && TARGET_SWAP)
+	{
+	  emit_insn (gen_rotrsi2_cnt16 (operands[0], operands[1]));
+	  emit_insn (gen_extendhisi2 (operands[0],
+				      gen_lowpart (HImode, operands[0])));
+	  while (--n >= 16)
+	    emit_insn (gen_ashrsi3_cnt1 (operands[0], operands[0]));
+	  return;
+	}
       else if (n == 30)
 	{
 	  rtx tmp = gen_reg_rtx (SImode);
@@ -4339,6 +4359,13 @@  arc_split_lshr (rtx *operands)
 	    emit_move_insn (operands[0], operands[1]);
 	  return;
 	}
+      else if (n >= 16 && n <= 19 && TARGET_SWAP && TARGET_V2)
+	{
+	  emit_insn (gen_lshrsi2_cnt16 (operands[0], operands[1]));
+	  while (--n >= 16)
+	    emit_insn (gen_lshrsi3_cnt1 (operands[0], operands[0]));
+	  return;
+	}
       else if (n == 30)
 	{
 	  rtx tmp = gen_reg_rtx (SImode);
@@ -4385,6 +4412,19 @@  arc_split_rotl (rtx *operands)
 	    emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
 	  return;
 	}
+      else if (n >= 13 && n <= 16 && TARGET_SWAP)
+	{
+	  emit_insn (gen_rotlsi2_cnt16 (operands[0], operands[1]));
+	  while (++n <= 16)
+	    emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
+	  return;
+	}
+      else if (n == 17 && TARGET_SWAP)
+	{
+	  emit_insn (gen_rotlsi2_cnt16 (operands[0], operands[1]));
+	  emit_insn (gen_rotlsi3_cnt1 (operands[0], operands[0]));
+	  return;
+	}
       else if (n >= 16 || n == 12 || n == 14)
 	{
 	  emit_insn (gen_rotrsi3_loop (operands[0], operands[1],
@@ -4415,6 +4455,19 @@  arc_split_rotr (rtx *operands)
 	    emit_move_insn (operands[0], operands[1]);
 	  return;
 	}
+      else if (n == 15 && TARGET_SWAP)
+	{
+	  emit_insn (gen_rotrsi2_cnt16 (operands[0], operands[1]));
+	  emit_insn (gen_rotlsi3_cnt1 (operands[0], operands[0]));
+	  return;
+	}
+      else if (n >= 16 && n <= 19 && TARGET_SWAP)
+	{
+	  emit_insn (gen_rotrsi2_cnt16 (operands[0], operands[1]));
+	  while (--n >= 16)
+	    emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
+	  return;
+	}
       else if (n >= 30)
 	{
 	  emit_insn (gen_rotlsi3_cnt1 (operands[0], operands[1]));
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index ee43887..7d5a124 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -3353,6 +3353,7 @@  archs4x, archs4xd"
 
 ;; Shift instructions.
 
+(define_code_iterator ANY_ROTATE [rotate rotatert])
 (define_code_iterator ANY_SHIFT_ROTATE [ashift ashiftrt lshiftrt
 					rotate rotatert])
 
@@ -3415,6 +3416,37 @@  archs4x, archs4xd"
    (set_attr "predicable" "no,no,yes,no,no")
    (set_attr "cond" "nocond,canuse,canuse,nocond,nocond")])
 
+(define_insn "<insn>si2_cnt16"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+	(ANY_ROTATE:SI (match_operand:SI 1 "register_operand" "c")
+		       (const_int 16)))]
+  "TARGET_SWAP"
+  "swap\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "two_cycle_core")])
+
+(define_insn "ashlsi2_cnt16"
+  [(set (match_operand:SI 0 "register_operand"             "=r")
+	(ashift:SI (match_operand:SI 1 "nonmemory_operand" "rL")
+		   (const_int 16)))]
+  "TARGET_SWAP && TARGET_V2"
+  "lsl16\\t%0,%1"
+  [(set_attr "type" "shift")
+   (set_attr "iscompact" "false")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")])
+
+(define_insn "lshrsi2_cnt16"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+	(lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "rL")
+		     (const_int 16)))]
+  "TARGET_SWAP && TARGET_V2"
+  "lsr16\\t%0,%1"
+  [(set_attr "type" "shift")
+   (set_attr "iscompact" "false")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")])
+
 ;; Split asl dst,1,src into bset dst,0,src.
 (define_insn_and_split "*ashlsi3_1"
   [(set (match_operand:SI 0 "dest_reg_operand")
@@ -5929,17 +5961,6 @@  archs4x, archs4xd"
    (set_attr "length" "4")
    (set_attr "predicable" "no")])
 
-(define_insn "*ashlsi2_cnt16"
-  [(set (match_operand:SI 0 "register_operand"            "=r")
-	(ashift:SI (match_operand:SI 1 "nonmemory_operand" "rL")
-		   (const_int 16)))]
-  "TARGET_SWAP && TARGET_V2"
-  "lsl16\\t%0,%1"
-  [(set_attr "type" "shift")
-   (set_attr "iscompact" "false")
-   (set_attr "length" "4")
-   (set_attr "predicable" "no")])
-
 (define_insn "lshrsi3_cnt1"
   [(set (match_operand:SI 0 "dest_reg_operand"             "=q,w")
 	(lshiftrt:SI (match_operand:SI 1 "register_operand" "q,c")
diff --git a/gcc/testsuite/gcc.target/arc/lsl16-1.c b/gcc/testsuite/gcc.target/arc/lsl16-1.c
new file mode 100644
index 0000000..cbd0dae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/lsl16-1.c
@@ -0,0 +1,10 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=em -mswap" } */
+
+int foo(int x)
+{
+  return x << 16;
+}
+
+/* { dg-final { scan-assembler "lsl16\\s+r0,r0" } } */
+
diff --git a/gcc/testsuite/gcc.target/arc/lsr16-1.c b/gcc/testsuite/gcc.target/arc/lsr16-1.c
new file mode 100644
index 0000000..8ce5f13
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/lsr16-1.c
@@ -0,0 +1,10 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=em -mswap" } */
+
+unsigned int foo(unsigned int x)
+{
+  return x >> 16;
+}
+
+/* { dg-final { scan-assembler "lsr16\\s+r0,r0" } } */
+
diff --git a/gcc/testsuite/gcc.target/arc/swap-1.c b/gcc/testsuite/gcc.target/arc/swap-1.c
new file mode 100644
index 0000000..71afc75
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/swap-1.c
@@ -0,0 +1,9 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=em -mswap" } */
+
+int foo(int x)
+{
+  return ((unsigned int)x >> 16) | (x << 16);
+}
+
+/* { dg-final { scan-assembler "swap\\s+r0,r0" } } */
diff --git a/gcc/testsuite/gcc.target/arc/swap-2.c b/gcc/testsuite/gcc.target/arc/swap-2.c
new file mode 100644
index 0000000..bf12392
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/swap-2.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=em -mswap" } */
+
+int foo(int x)
+{
+  return x >> 16;
+}
+
+/* { dg-final { scan-assembler "swap\\s+r0,r0" } } */
+/* { dg-final { scan-assembler "sexh_s\\s+r0,r0" } } */
+