[committed] CRIS: peephole2 an add into two addq or subq
Checks
Commit Message
Unfortunately, doesn't cause a performance improvement for coremark,
but happens a few times in newlib, just enough to affect coremark
0.01% by size (or 4 bytes, and three cycles (__fwalk_sglue and
__vfiprintf_r each two bytes).
gcc:
* config/cris/cris.md (splitop): Add PLUS.
* config/cris/cris.cc (cris_split_constant): Also handle
PLUS when a split into two insns may be useful.
gcc/testsuite:
* gcc.target/cris/peep2-addsplit1.c: New test.
---
gcc/config/cris/cris.cc | 25 +++++++-
gcc/config/cris/cris.md | 6 +-
.../gcc.target/cris/peep2-addsplit1.c | 59 +++++++++++++++++++
3 files changed, 88 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/cris/peep2-addsplit1.c
@@ -2642,7 +2642,30 @@ cris_split_constant (HOST_WIDE_INT wval, enum rtx_code code,
int32_t ival = (int32_t) wval;
uint32_t uval = (uint32_t) wval;
- if (code != AND || IN_RANGE(ival, -32, 31)
+ /* Can we do with two addq or two subq, improving chances of filling a
+ delay-slot? At worst, we break even, both performance and
+ size-wise. */
+ if (code == PLUS
+ && (IN_RANGE (ival, -63 * 2, -63 - 1)
+ || IN_RANGE (ival, 63 + 1, 63 * 2)))
+ {
+ if (generate)
+ {
+ int sign = ival < 0 ? -1 : 1;
+ int aval = abs (ival);
+
+ if (mode != SImode)
+ {
+ dest = gen_rtx_REG (SImode, REGNO (dest));
+ op = gen_rtx_REG (SImode, REGNO (op));
+ }
+ emit_insn (gen_addsi3 (dest, op, GEN_INT (63 * sign)));
+ emit_insn (gen_addsi3 (dest, op, GEN_INT ((aval - 63) * sign)));
+ }
+ return 2;
+ }
+
+ if (code != AND || IN_RANGE (ival, -32, 31)
/* Implemented using movu.[bw] elsewhere. */
|| ival == 255 || ival == 65535
/* Implemented using clear.[bw] elsewhere. */
@@ -209,7 +209,7 @@ (define_code_iterator plusminusumin [plus minus umin])
(define_code_iterator plusumin [plus umin])
;; For opsplit1.
-(define_code_iterator splitop [and])
+(define_code_iterator splitop [and plus])
;; The addsubbo and nd code-attributes form a hack. We need to output
;; "addu.b", "subu.b" but "bound.b" (no "u"-suffix) which means we'd
@@ -2984,6 +2984,10 @@ (define_peephole2 ; movandsplit1
;; Large (read: non-quick) numbers can sometimes be AND:ed by other means.
;; Testcase: gcc.target/cris/peep2-andsplit1.c
+;;
+;; Another case is add<ext> N,rx with -126..-64,64..126: it has the same
+;; size and execution time as two addq or subq, but addq and subq can fill
+;; a delay-slot.
(define_peephole2 ; opsplit1
[(parallel
[(set (match_operand 0 "register_operand")
new file mode 100644
@@ -0,0 +1,52 @@
+/* Check that "opsplit1" with PLUS does its job. */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-leading-underscore" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+int addsi (int x)
+{
+ return x + 64;
+}
+
+char addqi (char x)
+{
+ return x + 126;
+}
+
+short addhi (short x)
+{
+ return x - 64;
+}
+
+unsigned short addhi2 (short x)
+{
+ return x - 126;
+}
+
+/*
+** addsi:
+** addq 63,.r10
+** ret
+** addq 1,.r10
+*/
+
+/*
+** addqi:
+** addq 63,.r10
+** ret
+** addq 63,.r10
+*/
+
+/*
+** addhi:
+** subq 63,.r10
+** ret
+** subq 1,.r10
+*/
+
+/*
+** addhi2:
+** subq 63,.r10
+** ret
+** subq 63,.r10
+*/