This kind of transformation seems pretty generic and might be a
candidate for adding to the middle-end, perhaps as part of combine.
I noticed these happened more often for LRA, which is the reason I
went on this track of low-hanging-fruit-microoptimizations that are
such an itch when noticing them, inspecting generated code for libgcc.
Unfortunately, this one improves coremark only by a few cycles at the
beginning or end (<0.0005%) for cris-elf -march=v10. The size of the
coremark code is down by 0.4% (0.22% pre-lra).
Using an iterator from the start because other binary operations will
be added and their define_peephole2's would look exactly the same for
the .md part.
Some existing and-peephole2-related tests suffered, because many of
them were using patterns with only contiguous 1:s in them: adjusted.
Also, spotted and fixed, by adding a space, some
scan-assembler-strings that were prone to spurious identifier or file
name matches.
gcc:
* config/cris/cris.cc (cris_split_constant): New function.
* config/cris/cris.md (splitop): New iterator.
(opsplit1): New define_peephole2.
* config/cris/cris-protos.h (cris_split_constant): Declare.
(cris_splittable_constant_p): New macro.
gcc/testsuite:
* gcc.target/cris/peep2-andsplit1.c: New test.
* gcc.target/cris/peep2-andu1.c, gcc.target/cris/peep2-andu2.c,
gcc.target/cris/peep2-xsrand.c, gcc.target/cris/peep2-xsrand2.c:
Adjust values to avoid interference with "opsplit1" with AND. Add
whitespace to match-strings that may be confused with identifiers
or file names.
---
gcc/config/cris/cris-protos.h | 6 ++
gcc/config/cris/cris.cc | 78 +++++++++++++++++++
gcc/config/cris/cris.md | 26 +++++++
.../gcc.target/cris/peep2-andsplit1.c | 25 ++++++
gcc/testsuite/gcc.target/cris/peep2-andu1.c | 4 +-
gcc/testsuite/gcc.target/cris/peep2-andu2.c | 6 +-
gcc/testsuite/gcc.target/cris/peep2-xsrand.c | 6 +-
gcc/testsuite/gcc.target/cris/peep2-xsrand2.c | 6 +-
8 files changed, 146 insertions(+), 11 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/cris/peep2-andsplit1.c
@@ -44,6 +44,12 @@ extern rtx cris_emit_movem_store (rtx, rtx, int, bool);
extern rtx_insn *cris_emit_insn (rtx x);
extern void cris_order_for_addsi3 (rtx *, int);
extern void cris_emit_trap_for_misalignment (rtx);
+extern int cris_split_constant (HOST_WIDE_INT, enum rtx_code,
+ machine_mode, bool,
+ bool generate = false,
+ rtx dest = NULL_RTX,
+ rtx op = NULL_RTX);
+#define cris_splittable_constant_p cris_split_constant
#endif /* RTX_CODE */
extern void cris_asm_output_label_ref (FILE *, char *);
extern void cris_asm_output_ident (const char *);
@@ -2626,6 +2626,84 @@ cris_split_movdx (rtx *operands)
return val;
}
+/* Try to split the constant WVAL into a number of separate insns of less cost
+ for the rtx operation CODE and the metric SPEED than using val as-is.
+ Generate those insns if GENERATE. DEST holds the destination, and OP holds
+ the other operand for binary operations; NULL when CODE is SET. Return the
+ number of insns for the operation or 0 if the constant can't be usefully
+ split (because it's already minimal or is not within range for the known
+ methods). Parts stolen from arm.cc. */
+
+int
+cris_split_constant (HOST_WIDE_INT wval, enum rtx_code code,
+ machine_mode mode, bool speed ATTRIBUTE_UNUSED,
+ bool generate, rtx dest, rtx op)
+{
+ int32_t ival = (int32_t) wval;
+ uint32_t uval = (uint32_t) wval;
+
+ if (code != AND || IN_RANGE(ival, -32, 31)
+ /* Implemented using movu.[bw] elsewhere. */
+ || ival == 255 || ival == 65535
+ /* Implemented using clear.[bw] elsewhere. */
+ || uval == 0xffffff00 || uval == 0xffff0000)
+ return 0;
+
+ int i;
+
+ int msb_zeros = 0;
+ int lsb_zeros = 0;
+
+ /* Count number of leading zeros. */
+ for (i = 31; i >= 0; i--)
+ {
+ if ((uval & (1 << i)) == 0)
+ msb_zeros++;
+ else
+ break;
+ }
+
+ /* Count number of trailing zero's. */
+ for (i = 0; i <= 31; i++)
+ {
+ if ((uval & (1 << i)) == 0)
+ lsb_zeros++;
+ else
+ break;
+ }
+
+ /* Is there a lowest or highest part that is zero (but not both)
+ and the non-zero part is just ones? */
+ if (exact_log2 ((uval >> lsb_zeros) + 1) > 0
+ && (lsb_zeros != 0) != (msb_zeros != 0))
+ {
+ /* If so, we can shift OP in the zero direction, then back. We don't
+ nominally win anything for uval < 256, except that the insns are split
+ into slottable insns so it's always beneficial. */
+ if (generate)
+ {
+ if (mode != SImode)
+ {
+ dest = gen_rtx_REG (SImode, REGNO (dest));
+ op = gen_rtx_REG (SImode, REGNO (op));
+ }
+ if (msb_zeros)
+ {
+ emit_insn (gen_ashlsi3 (dest, op, GEN_INT (msb_zeros)));
+ emit_insn (gen_lshrsi3 (dest, op, GEN_INT (msb_zeros)));
+ }
+ else
+ {
+ emit_insn (gen_lshrsi3 (dest, op, GEN_INT (lsb_zeros)));
+ emit_insn (gen_ashlsi3 (dest, op, GEN_INT (lsb_zeros)));
+ }
+ }
+ return 2;
+ }
+
+ return 0;
+}
+
/* Try to change a comparison against a constant to be against zero, and
an unsigned compare against zero to be an equality test. Beware:
only valid for compares of integer-type operands. Also, note that we
@@ -208,6 +208,9 @@ (define_code_iterator plusminusumin [plus minus umin])
;; Ditto, commutative operators (i.e. not minus).
(define_code_iterator plusumin [plus umin])
+;; For opsplit1.
+(define_code_iterator splitop [and])
+
;; The addsubbo and nd code-attributes form a hack. We need to output
;; "addu.b", "subu.b" but "bound.b" (no "u"-suffix) which means we'd
;; need to refer to one iterator from the next. But, that can't be
@@ -2888,6 +2891,29 @@ (define_peephole2 ; andqu
operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[1]), QImode));
})
+;; Large (read: non-quick) numbers can sometimes be AND:ed by other means.
+;; Testcase: gcc.target/cris/peep2-andsplit1.c
+(define_peephole2 ; opsplit1
+ [(parallel
+ [(set (match_operand 0 "register_operand")
+ (splitop
+ (match_operand 1 "register_operand")
+ (match_operand 2 "const_int_operand")))
+ (clobber (reg:CC CRIS_CC0_REGNUM))])]
+ ;; Operands 0 and 1 can be separate identical objects, at least
+ ;; after matching peepholes above. */
+ "REGNO (operands[0]) == REGNO (operands[1])
+ && cris_splittable_constant_p (INTVAL (operands[2]), <CODE>,
+ GET_MODE (operands[0]),
+ optimize_function_for_speed_p (cfun))"
+ [(const_int 0)]
+{
+ cris_split_constant (INTVAL (operands[2]), <CODE>, GET_MODE (operands[0]),
+ optimize_function_for_speed_p (cfun),
+ true, operands[0], operands[0]);
+ DONE;
+})
+
;; Fix a decomposed szext: fuse it with the memory operand of the
;; load. This is typically the sign-extension part of a decomposed
;; "indirect offset" address.
new file mode 100644
@@ -0,0 +1,25 @@
+/* Check that "opsplit1" with AND does its job. */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int al0 (int x)
+{
+ return x & 0x7fffffff;
+}
+
+int alN (int x)
+{
+ return x & 63;
+}
+
+int ar0 (int x)
+{
+ return x & (-32*2);
+}
+
+int arN (int x)
+{
+ return x & 0x80000000;
+}
+
+/* { dg-final { scan-assembler-not "\[ \t\]and" } } */
@@ -20,13 +20,13 @@ clearb (int x, int *y)
int
andb (int x, int *y)
{
- return *y & 0x3f;
+ return *y & 0x3d;
}
int
andw (int x, int *y)
{
- return *y & 0xfff;
+ return *y & 0xffd;
}
int
@@ -1,6 +1,6 @@
/* { dg-do assemble } */
-/* { dg-final { scan-assembler "movu.w \\\$r10,\\\$|movu.w 2047," } } */
-/* { dg-final { scan-assembler "and.w 2047,\\\$|and.d \\\$r10," } } */
+/* { dg-final { scan-assembler "movu.w \\\$r10,\\\$|movu.w 2045," } } */
+/* { dg-final { scan-assembler "and.w 2045,\\\$|and.d \\\$r10," } } */
/* { dg-final { scan-assembler-not "move.d \\\$r10,\\\$" } } */
/* { dg-final { scan-assembler "movu.b \\\$r10,\\\$|movu.b 95," } } */
/* { dg-final { scan-assembler "and.b 95,\\\$|and.d \\\$r10," } } */
@@ -19,7 +19,7 @@
unsigned int
and_peep2_hi (unsigned int y, unsigned int *x)
{
- *x = y & 0x7ff;
+ *x = y & 0x7fd;
return y;
}
@@ -1,7 +1,7 @@
/* { dg-do compile } */
/* { dg-final { scan-assembler "and.w " } } */
/* { dg-final { scan-assembler "and.b " } } */
-/* { dg-final { scan-assembler-not "and.d" } } */
+/* { dg-final { scan-assembler-not "and.d " } } */
/* { dg-options "-O2" } */
/* Test the "asrandb", "asrandw", "lsrandb" and "lsrandw" peephole2:s
@@ -10,7 +10,7 @@
unsigned int
andwlsr (unsigned int x)
{
- return (x >> 17) & 0x7ff;
+ return (x >> 17) & 0x7fd;
}
unsigned int
@@ -22,7 +22,7 @@ andblsr (unsigned int x)
int
andwasr (int x)
{
- return (x >> 17) & 0x7ff;
+ return (x >> 17) & 0x7fd;
}
int
@@ -1,9 +1,9 @@
/* { dg-do compile } */
/* { dg-final { scan-assembler "and.w -137," } } */
-/* { dg-final { scan-assembler "and.b -64," } } */
+/* { dg-final { scan-assembler "and.b -62," } } */
/* { dg-final { scan-assembler "and.w -139," } } */
/* { dg-final { scan-assembler "and.b -63," } } */
-/* { dg-final { scan-assembler-not "and.d" } } */
+/* { dg-final { scan-assembler-not "and.d " } } */
/* { dg-options "-O2" } */
/* PR target/17984. Test-case based on
@@ -18,7 +18,7 @@ andwlsr (unsigned int x)
unsigned int
andblsr (unsigned int x)
{
- return (x >> 24) & 0xc0;
+ return (x >> 24) & 0xc2;
}
int