[ARC] Improved DImode rotates and right shifts by one bit.

Message ID 01fd01da10d6$e3c2ef60$ab48ce20$@nextmovesoftware.com
State Unresolved
Headers
Series [ARC] Improved DImode rotates and right shifts by one bit. |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Roger Sayle Nov. 6, 2023, 5:30 p.m. UTC
  This patch improves the code generated for DImode right shifts (both
arithmetic and logical) by a single bit, and also for DImode rotates
(both left and right) by a single bit.  In approach, this is similar
to the recently added DImode left shift by a single bit patch, but
also builds upon i386.md's UNSPEC carry flag representation:
https://gcc.gnu.org/pipermail/gcc-patches/2023-October/632169.html

The benefits can be seen from the four new test cases:

long long ashr(long long x) { return x >> 1; }

Before:
ashr:   asl     r2,r1,31
        lsr_s   r0,r0
        or_s    r0,r0,r2
        j_s.d   [blink]
        asr_s   r1,r1,1

After:
ashr:   asr.f   r1,r1
        j_s.d   [blink]
        rrc     r0,r0

unsigned long long lshr(unsigned long long x) { return x >> 1; }

Before:
lshr:   asl     r2,r1,31
        lsr_s   r0,r0
        or_s    r0,r0,r2
        j_s.d   [blink]
        lsr_s   r1,r1

After:
lshr:   lsr.f   r1,r1
        j_s.d   [blink]
        rrc     r0,r0

unsigned long long rotl(unsigned long long x) { return (x<<1) | (x>>63); }

Before:
rotl:   lsr     r12,r1,31
        lsr     r2,r0,31
        asl_s   r3,r0,1
        asl_s   r1,r1,1
        or      r0,r12,r3
        j_s.d   [blink]
        or_s    r1,r1,r2

After:
rotl:   add.f   r0,r0,r0
        adc.f   r1,r1,r1
        j_s.d   [blink]
        add.cs  r0,r0,1

unsigned long long rotr(unsigned long long x) { return (x>>1) | (x<<63); }

Before:
rotr:   asl     r12,r1,31
        asl     r2,r0,31
        lsr_s   r3,r0
        lsr_s   r1,r1
        or      r0,r12,r3
        j_s.d   [blink]
        or_s    r1,r1,r2

After:
rotr:   asr.f   0,r0
        rrc.f   r1,r1
        j_s.d   [blink]
        rrc     r0,r0

On CPUs without a barrel shifter the improvements are even better.

Tested with a cross-compiler to arc-linux hosted on x86_64,
with no new (compile-only) regressions from make -k check.
Ok for mainline if this passes Claudiu's nightly testing?


2023-11-06  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
        * config/arc/arc.md (UNSPEC_ARC_CC_NEZ): New UNSPEC that
        represents the carry flag being set if the operand is non-zero.
        (adc_f): New define_insn representing adc with updated flags.
        (ashrdi3): New define_expand that only handles shifts by 1.
        (ashrdi3_cnt1): New pre-reload define_insn_and_split.
        (lshrdi3): New define_expand that only handles shifts by 1.
        (lshrdi3_cnt1): New pre-reload define_insn_and_split.
        (rrcsi2): New define_insn for rrc (SImode rotate right through
carry).
        (rrcsi2_carry): Likewise for rrc.f, as above but updating flags.
        (rotldi3): New define_expand that only handles rotates by 1.
        (rotldi3_cnt1): New pre-reload define_insn_and_split.
        (rotrdi3): New define_expand that only handles rotates by 1.
        (rotrdi3_cnt1): New pre-reload define_insn_and_split.
        (lshrsi3_cnt1_carry): New define_insn for lsr.f.
        (ashrsi3_cnt1_carry): New define_insn for asr.f.
        (btst_0_carry): New define_insn for asr.f without result.

gcc/testsuite/ChangeLog
        * gcc.target/arc/ashrdi3-1.c: New test case.
        * gcc.target/arc/lshrdi3-1.c: Likewise.
        * gcc.target/arc/rotldi3-1.c: Likewise.
        * gcc.target/arc/rotrdi3-1.c: Likewise.


Thanks in advance,
Roger
--
  

Comments

Claudiu Zissulescu Nov. 13, 2023, 8:15 a.m. UTC | #1
Looks good too. Please proceed with your commit.

Thank you for your contribution,
//Claudiu

-----Original Message-----
From: Roger Sayle <roger@nextmovesoftware.com> 
Sent: Monday, November 6, 2023 7:30 PM
To: gcc-patches@gcc.gnu.org
Cc: 'Claudiu Zissulescu' <claziss@gmail.com>
Subject: [ARC PATCH] Improved DImode rotates and right shifts by one bit.


This patch improves the code generated for DImode right shifts (both arithmetic and logical) by a single bit, and also for DImode rotates (both left and right) by a single bit.  In approach, this is similar to the recently added DImode left shift by a single bit patch, but also builds upon i386.md's UNSPEC carry flag representation:
https://urldefense.com/v3/__https://gcc.gnu.org/pipermail/gcc-patches/2023-October/632169.html__;!!A4F2R9G_pg!fxasivX0MLFBbgzab_TjnY9wQnIao29buOVHUv6gvPzOS-W4IWfIdwse4TRg__ek2AEplJ7BpYrTYBt1hB8mUCM$ 

The benefits can be seen from the four new test cases:

long long ashr(long long x) { return x >> 1; }

Before:
ashr:   asl     r2,r1,31
        lsr_s   r0,r0
        or_s    r0,r0,r2
        j_s.d   [blink]
        asr_s   r1,r1,1

After:
ashr:   asr.f   r1,r1
        j_s.d   [blink]
        rrc     r0,r0

unsigned long long lshr(unsigned long long x) { return x >> 1; }

Before:
lshr:   asl     r2,r1,31
        lsr_s   r0,r0
        or_s    r0,r0,r2
        j_s.d   [blink]
        lsr_s   r1,r1

After:
lshr:   lsr.f   r1,r1
        j_s.d   [blink]
        rrc     r0,r0

unsigned long long rotl(unsigned long long x) { return (x<<1) | (x>>63); }

Before:
rotl:   lsr     r12,r1,31
        lsr     r2,r0,31
        asl_s   r3,r0,1
        asl_s   r1,r1,1
        or      r0,r12,r3
        j_s.d   [blink]
        or_s    r1,r1,r2

After:
rotl:   add.f   r0,r0,r0
        adc.f   r1,r1,r1
        j_s.d   [blink]
        add.cs  r0,r0,1

unsigned long long rotr(unsigned long long x) { return (x>>1) | (x<<63); }

Before:
rotr:   asl     r12,r1,31
        asl     r2,r0,31
        lsr_s   r3,r0
        lsr_s   r1,r1
        or      r0,r12,r3
        j_s.d   [blink]
        or_s    r1,r1,r2

After:
rotr:   asr.f   0,r0
        rrc.f   r1,r1
        j_s.d   [blink]
        rrc     r0,r0

On CPUs without a barrel shifter the improvements are even better.

Tested with a cross-compiler to arc-linux hosted on x86_64, with no new (compile-only) regressions from make -k check.
Ok for mainline if this passes Claudiu's nightly testing?


2023-11-06  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
        * config/arc/arc.md (UNSPEC_ARC_CC_NEZ): New UNSPEC that
        represents the carry flag being set if the operand is non-zero.
        (adc_f): New define_insn representing adc with updated flags.
        (ashrdi3): New define_expand that only handles shifts by 1.
        (ashrdi3_cnt1): New pre-reload define_insn_and_split.
        (lshrdi3): New define_expand that only handles shifts by 1.
        (lshrdi3_cnt1): New pre-reload define_insn_and_split.
        (rrcsi2): New define_insn for rrc (SImode rotate right through carry).
        (rrcsi2_carry): Likewise for rrc.f, as above but updating flags.
        (rotldi3): New define_expand that only handles rotates by 1.
        (rotldi3_cnt1): New pre-reload define_insn_and_split.
        (rotrdi3): New define_expand that only handles rotates by 1.
        (rotrdi3_cnt1): New pre-reload define_insn_and_split.
        (lshrsi3_cnt1_carry): New define_insn for lsr.f.
        (ashrsi3_cnt1_carry): New define_insn for asr.f.
        (btst_0_carry): New define_insn for asr.f without result.

gcc/testsuite/ChangeLog
        * gcc.target/arc/ashrdi3-1.c: New test case.
        * gcc.target/arc/lshrdi3-1.c: Likewise.
        * gcc.target/arc/rotldi3-1.c: Likewise.
        * gcc.target/arc/rotrdi3-1.c: Likewise.


Thanks in advance,
Roger
--
  

Patch

diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 7702978..97231b9 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -137,6 +137,7 @@ 
   UNSPEC_ARC_VMAC2HU
   UNSPEC_ARC_VMPY2H
   UNSPEC_ARC_VMPY2HU
+  UNSPEC_ARC_CC_NEZ
 
   VUNSPEC_ARC_RTIE
   VUNSPEC_ARC_SYNC
@@ -2790,6 +2791,31 @@  archs4x, archs4xd"
    (set_attr "type" "cc_arith")
    (set_attr "length" "4,4,4,4,8,8")])
 
+(define_insn "adc_f"
+  [(set (reg:CC_C CC_REG)
+	(compare:CC_C
+	  (zero_extend:DI
+	    (plus:SI
+	      (plus:SI
+		(ltu:SI (reg:CC_C CC_REG) (const_int 0))
+		(match_operand:SI 1 "register_operand" "%r"))
+	      (match_operand:SI 2 "register_operand" "r")))
+	  (plus:DI
+	    (ltu:DI (reg:CC_C CC_REG) (const_int 0))
+	    (zero_extend:DI (match_dup 1)))))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI
+	  (plus:SI
+	    (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+	    (match_dup 1))
+	  (match_dup 2)))]
+  ""
+  "adc.f\\t%0,%1,%2"
+  [(set_attr "cond" "set")
+   (set_attr "predicable" "no")
+   (set_attr "type" "cc_arith")
+   (set_attr "length" "4")])
+
 ; combiner-splitter cmp / scc -> cmp / adc
 (define_split
   [(set (match_operand:SI 0 "dest_reg_operand" "")
@@ -3530,6 +3556,68 @@  archs4x, archs4xd"
   ""
   [(set_attr "length" "8")])
 
+(define_expand "ashrdi3"
+  [(parallel
+      [(set (match_operand:DI 0 "register_operand")
+	    (ashiftrt:DI (match_operand:DI 1 "register_operand")
+			 (match_operand:QI 2 "const_int_operand")))
+       (clobber (reg:CC CC_REG))])]
+  ""
+{
+  if (operands[2] != const1_rtx)
+    FAIL;
+})
+
+;; Split into asr.f hi; rrc lo
+(define_insn_and_split "*ashrdi3_cnt1"
+  [(set (match_operand:DI 0 "register_operand")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand")
+		     (const_int 1)))
+   (clobber (reg:CC CC_REG))]
+  "arc_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_ashrsi3_cnt1_carry (gen_highpart (SImode, operands[0]),
+				     gen_highpart (SImode, operands[1])));
+  emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]),
+			 gen_lowpart (SImode, operands[1])));
+  DONE;
+}
+  [(set_attr "length" "8")])
+
+(define_expand "lshrdi3"
+  [(parallel
+      [(set (match_operand:DI 0 "register_operand")
+	    (lshiftrt:DI (match_operand:DI 1 "register_operand")
+			 (match_operand:QI 2 "const_int_operand")))
+       (clobber (reg:CC CC_REG))])]
+  ""
+{
+  if (operands[2] != const1_rtx)
+    FAIL;
+})
+
+;; Split into lsr.f hi; rrc lo
+(define_insn_and_split "*lshrdi3_cnt1"
+  [(set (match_operand:DI 0 "register_operand")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand")
+		     (const_int 1)))
+   (clobber (reg:CC CC_REG))]
+  "arc_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_lshrsi3_cnt1_carry (gen_highpart (SImode, operands[0]),
+				     gen_highpart (SImode, operands[1])));
+  emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]),
+			 gen_lowpart (SImode, operands[1])));
+  DONE;
+}
+  [(set_attr "length" "8")])
+
 ;; Rotate instructions.
 
 (define_insn "rotrsi3_insn"
@@ -3571,6 +3659,103 @@  archs4x, archs4xd"
     }
 })
 
+;; Rotate through carry flag
+
+(define_insn "rrcsi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r")
+	(plus:SI
+	  (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		       (const_int 1))
+	  (ashift:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+		     (const_int 31))))]
+  ""
+  "rrc\\t%0,%1"
+  [(set_attr "type" "shift")
+   (set_attr "predicable" "no")
+   (set_attr "length" "4")])
+
+(define_insn "rrcsi2_carry"
+  [(set (reg:CC_C CC_REG)
+	(unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
+			      (const_int 1))] UNSPEC_ARC_CC_NEZ))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r")
+	(plus:SI
+	  (lshiftrt:SI (match_dup 1) (const_int 1))
+	  (ashift:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+		     (const_int 31))))]
+  ""
+  "rrc.f\\t%0,%1"
+  [(set_attr "type" "shift")
+   (set_attr "predicable" "no")
+   (set_attr "length" "4")])
+
+;; DImode Rotate instructions
+
+(define_expand "rotldi3"
+  [(parallel
+      [(set (match_operand:DI 0 "register_operand")
+	    (rotate:DI (match_operand:DI 1 "register_operand")
+		       (match_operand:QI 2 "const_int_operand")))
+       (clobber (reg:CC CC_REG))])]
+  ""
+{
+  if (operands[2] != const1_rtx)
+    FAIL;
+})
+
+;; split into add.f lo; adc.f hi; adc lo
+(define_insn_and_split "*rotldi3_cnt1"
+  [(set (match_operand:DI 0 "register_operand")
+	(rotate:DI (match_operand:DI 1 "register_operand")
+		   (const_int 1)))
+   (clobber (reg:CC CC_REG))]
+  "arc_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx lo0 = gen_lowpart (SImode, operands[0]);
+  rtx lo1 = gen_lowpart (SImode, operands[1]);
+  rtx hi1 = gen_highpart (SImode, operands[1]);
+  emit_insn (gen_add_f (lo0, lo1, lo1));
+  emit_insn (gen_adc_f (gen_highpart (SImode, operands[0]), hi1, hi1));
+  emit_insn (gen_adc (lo0, lo0, const0_rtx));
+  DONE;
+}
+  [(set_attr "length" "12")])
+
+(define_expand "rotrdi3"
+  [(parallel
+      [(set (match_operand:DI 0 "register_operand")
+	    (rotatert:DI (match_operand:DI 1 "register_operand")
+			 (match_operand:QI 2 "const_int_operand")))
+       (clobber (reg:CC CC_REG))])]
+  ""
+{
+  if (operands[2] != const1_rtx)
+    FAIL;
+})
+
+;; split into asr.f lo; rrc.f hi; rrc lo
+(define_insn_and_split "*rotrdi3_cnt1"
+  [(set (match_operand:DI 0 "register_operand")
+	(rotatert:DI (match_operand:DI 1 "register_operand")
+		     (const_int 1)))
+   (clobber (reg:CC CC_REG))]
+  "arc_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx lo = gen_lowpart (SImode, operands[1]);
+  emit_insn (gen_btst_0_carry (lo));
+  emit_insn (gen_rrcsi2_carry (gen_highpart (SImode, operands[0]),
+			       gen_highpart (SImode, operands[1])));
+  emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]), lo));
+  DONE;
+}
+  [(set_attr "length" "12")])
+
 ;; Compare / branch instructions.
 
 (define_expand "cbranchsi4"
@@ -6022,6 +6207,18 @@  archs4x, archs4xd"
    (set_attr "iscompact" "maybe,false")
    (set_attr "predicable" "no,no")])
 
+(define_insn "lshrsi3_cnt1_carry"
+  [(set (reg:CC_C CC_REG)
+	(unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
+			      (const_int 1))] UNSPEC_ARC_CC_NEZ))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r")
+	(lshiftrt:SI (match_dup 1) (const_int 1)))]
+  ""
+  "lsr.f\\t%0,%1"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")])
+
 (define_insn "ashrsi3_cnt1"
   [(set (match_operand:SI 0 "dest_reg_operand"             "=q,w")
 	(ashiftrt:SI (match_operand:SI 1 "register_operand" "q,c")
@@ -6032,6 +6229,28 @@  archs4x, archs4xd"
    (set_attr "iscompact" "maybe,false")
    (set_attr "predicable" "no,no")])
 
+(define_insn "ashrsi3_cnt1_carry"
+  [(set (reg:CC_C CC_REG)
+	(unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
+			      (const_int 1))] UNSPEC_ARC_CC_NEZ))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r")
+	(ashiftrt:SI (match_dup 1) (const_int 1)))]
+  ""
+  "asr.f\\t%0,%1"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")])
+
+(define_insn "btst_0_carry"
+  [(set (reg:CC_C CC_REG)
+	(unspec:CC_C [(and:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1))] UNSPEC_ARC_CC_NEZ))]
+  ""
+  "asr.f\\t0,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")])
+
 (define_peephole2
   [(set (match_operand:SI 0 "register_operand" "")
 	(zero_extract:SI (match_dup 0)
diff --git a/gcc/testsuite/gcc.target/arc/ashrdi3-1.c b/gcc/testsuite/gcc.target/arc/ashrdi3-1.c
new file mode 100644
index 0000000..d990bfd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/ashrdi3-1.c
@@ -0,0 +1,10 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long long foo(long long x)
+{
+  return x >> 1;
+}
+
+/* { dg-final { scan-assembler "asr.f\\s+r1,r1" } } */
+/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */
diff --git a/gcc/testsuite/gcc.target/arc/lshrdi3-1.c b/gcc/testsuite/gcc.target/arc/lshrdi3-1.c
new file mode 100644
index 0000000..6542ffd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/lshrdi3-1.c
@@ -0,0 +1,10 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned long long foo(unsigned long long x)
+{
+  return x >> 1;
+}
+
+/* { dg-final { scan-assembler "lsr.f\\s+r1,r1" } } */
+/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */
diff --git a/gcc/testsuite/gcc.target/arc/rotldi3-1.c b/gcc/testsuite/gcc.target/arc/rotldi3-1.c
new file mode 100644
index 0000000..325996e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/rotldi3-1.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned long long foo(unsigned long long x)
+{
+  return (x << 1) | (x >> 63);
+}
+
+/* { dg-final { scan-assembler "add.f\\s+r0,r0,r0" } } */
+/* { dg-final { scan-assembler "adc.f\\s+r1,r1,r1" } } */
+/* { dg-final { scan-assembler "add.cs\\s+r0,r0,1" } } */
diff --git a/gcc/testsuite/gcc.target/arc/rotrdi3-1.c b/gcc/testsuite/gcc.target/arc/rotrdi3-1.c
new file mode 100644
index 0000000..cd8e0de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/rotrdi3-1.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned long long foo(unsigned long long x)
+{
+  return (x >> 1) | (x << 63);
+}
+
+/* { dg-final { scan-assembler "asr.f\\s+0,r0" } } */
+/* { dg-final { scan-assembler "rrc.f\\s+r1,r1" } } */
+/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */