[committed] CRIS: peephole2 an "and" with a contiguous "one-sided" sequences of 1s

Message ID 20230504004748.E0F7D20416@pchp3.se.axis.com
State Repeat Merge
Headers
Series [committed] CRIS: peephole2 an "and" with a contiguous "one-sided" sequences of 1s |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Hans-Peter Nilsson May 4, 2023, 12:47 a.m. UTC
  This kind of transformation seems pretty generic and might be a
candidate for adding to the middle-end, perhaps as part of combine.

I noticed these happened more often for LRA, which is the reason I
went on this track of low-hanging-fruit-microoptimizations that are
such an itch when noticing them, inspecting generated code for libgcc.
Unfortunately, this one improves coremark only by a few cycles at the
beginning or end (<0.0005%) for cris-elf -march=v10.  The size of the
coremark code is down by 0.4% (0.22% pre-lra).

Using an iterator from the start because other binary operations will
be added and their define_peephole2's would look exactly the same for
the .md part.

Some existing and-peephole2-related tests suffered, because many of
them were using patterns with only contiguous 1:s in them: adjusted.
Also, spotted and fixed, by adding a space, some
scan-assembler-strings that were prone to spurious identifier or file
name matches.

gcc:
	* config/cris/cris.cc (cris_split_constant): New function.
	* config/cris/cris.md (splitop): New iterator.
	(opsplit1): New define_peephole2.
	* config/cris/cris-protos.h (cris_split_constant): Declare.
	(cris_splittable_constant_p): New macro.

gcc/testsuite:
	* gcc.target/cris/peep2-andsplit1.c: New test.
	* gcc.target/cris/peep2-andu1.c, gcc.target/cris/peep2-andu2.c,
	gcc.target/cris/peep2-xsrand.c, gcc.target/cris/peep2-xsrand2.c:
	Adjust values to avoid interference with "opsplit1" with AND.  Add
	whitespace to match-strings that may be confused with identifiers
	or file names.
---
 gcc/config/cris/cris-protos.h                 |  6 ++
 gcc/config/cris/cris.cc                       | 78 +++++++++++++++++++
 gcc/config/cris/cris.md                       | 26 +++++++
 .../gcc.target/cris/peep2-andsplit1.c         | 25 ++++++
 gcc/testsuite/gcc.target/cris/peep2-andu1.c   |  4 +-
 gcc/testsuite/gcc.target/cris/peep2-andu2.c   |  6 +-
 gcc/testsuite/gcc.target/cris/peep2-xsrand.c  |  6 +-
 gcc/testsuite/gcc.target/cris/peep2-xsrand2.c |  6 +-
 8 files changed, 146 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/cris/peep2-andsplit1.c
  

Patch

diff --git a/gcc/config/cris/cris-protos.h b/gcc/config/cris/cris-protos.h
index de9eacbae2aa..666e04f9eeec 100644
--- a/gcc/config/cris/cris-protos.h
+++ b/gcc/config/cris/cris-protos.h
@@ -44,6 +44,12 @@  extern rtx cris_emit_movem_store (rtx, rtx, int, bool);
 extern rtx_insn *cris_emit_insn (rtx x);
 extern void cris_order_for_addsi3 (rtx *, int);
 extern void cris_emit_trap_for_misalignment (rtx);
+extern int cris_split_constant (HOST_WIDE_INT, enum rtx_code,
+				machine_mode, bool,
+				bool generate = false,
+				rtx dest = NULL_RTX,
+				rtx op = NULL_RTX);
+#define cris_splittable_constant_p cris_split_constant
 #endif /* RTX_CODE */
 extern void cris_asm_output_label_ref (FILE *, char *);
 extern void cris_asm_output_ident (const char *);
diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc
index 05dead9c0778..331f5908a538 100644
--- a/gcc/config/cris/cris.cc
+++ b/gcc/config/cris/cris.cc
@@ -2626,6 +2626,84 @@  cris_split_movdx (rtx *operands)
   return val;
 }
 
+/* Try to split the constant WVAL into a number of separate insns of less cost
+   for the rtx operation CODE and the metric SPEED than using val as-is.
+   Generate those insns if GENERATE.  DEST holds the destination, and OP holds
+   the other operand for binary operations; NULL when CODE is SET.  Return the
+   number of insns for the operation or 0 if the constant can't be usefully
+   split (because it's already minimal or is not within range for the known
+   methods).  Parts stolen from arm.cc.  */
+
+int
+cris_split_constant (HOST_WIDE_INT wval, enum rtx_code code,
+		     machine_mode mode, bool speed ATTRIBUTE_UNUSED,
+		     bool generate, rtx dest, rtx op)
+{
+  int32_t ival = (int32_t) wval;
+  uint32_t uval = (uint32_t) wval;
+
+  if (code != AND || IN_RANGE(ival, -32, 31)
+      /* Implemented using movu.[bw] elsewhere.  */
+      || ival == 255 || ival == 65535
+      /* Implemented using clear.[bw] elsewhere.  */
+      || uval == 0xffffff00 || uval == 0xffff0000)
+    return 0;
+
+  int i;
+
+  int msb_zeros = 0;
+  int lsb_zeros = 0;
+
+  /* Count number of leading zeros.  */
+  for (i = 31; i >= 0; i--)
+    {
+      if ((uval & (1 << i)) == 0)
+	msb_zeros++;
+      else
+	break;
+    }
+
+  /* Count number of trailing zero's.  */
+  for (i = 0; i <= 31; i++)
+    {
+      if ((uval & (1 << i)) == 0)
+	lsb_zeros++;
+      else
+	break;
+    }
+
+  /* Is there a lowest or highest part that is zero (but not both)
+     and the non-zero part is just ones?  */
+  if (exact_log2 ((uval >> lsb_zeros) + 1) > 0
+      && (lsb_zeros != 0) != (msb_zeros != 0))
+    {
+      /* If so, we can shift OP in the zero direction, then back.  We don't
+	 nominally win anything for uval < 256, except that the insns are split
+	 into slottable insns so it's always beneficial.  */
+      if (generate)
+	{
+	  if (mode != SImode)
+	    {
+	      dest = gen_rtx_REG (SImode, REGNO (dest));
+	      op = gen_rtx_REG (SImode, REGNO (op));
+	    }
+	  if (msb_zeros)
+	    {
+	      emit_insn (gen_ashlsi3 (dest, op, GEN_INT (msb_zeros)));
+	      emit_insn (gen_lshrsi3 (dest, op, GEN_INT (msb_zeros)));
+	    }
+	  else
+	    {
+	      emit_insn (gen_lshrsi3 (dest, op, GEN_INT (lsb_zeros)));
+	      emit_insn (gen_ashlsi3 (dest, op, GEN_INT (lsb_zeros)));
+	    }
+	}
+      return 2;
+    }
+
+  return 0;
+}
+
 /* Try to change a comparison against a constant to be against zero, and
    an unsigned compare against zero to be an equality test.  Beware:
    only valid for compares of integer-type operands.  Also, note that we
diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
index 366b4bc304bf..e72943b942e5 100644
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -208,6 +208,9 @@  (define_code_iterator plusminusumin [plus minus umin])
 ;; Ditto, commutative operators (i.e. not minus).
 (define_code_iterator plusumin [plus umin])
 
+;; For opsplit1.
+(define_code_iterator splitop [and])
+
 ;; The addsubbo and nd code-attributes form a hack.  We need to output
 ;; "addu.b", "subu.b" but "bound.b" (no "u"-suffix) which means we'd
 ;; need to refer to one iterator from the next.  But, that can't be
@@ -2888,6 +2891,29 @@  (define_peephole2 ; andqu
   operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[1]), QImode));
 })
 
+;; Large (read: non-quick) numbers can sometimes be AND:ed by other means.
+;; Testcase: gcc.target/cris/peep2-andsplit1.c
+(define_peephole2 ; opsplit1
+  [(parallel
+    [(set (match_operand 0 "register_operand")
+	  (splitop
+	   (match_operand 1 "register_operand")
+	   (match_operand 2 "const_int_operand")))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])]
+   ;; Operands 0 and 1 can be separate identical objects, at least
+   ;; after matching peepholes above.  */
+  "REGNO (operands[0]) == REGNO (operands[1])
+   && cris_splittable_constant_p (INTVAL (operands[2]), <CODE>,
+				  GET_MODE (operands[0]),
+				  optimize_function_for_speed_p (cfun))"
+  [(const_int 0)]
+{
+  cris_split_constant (INTVAL (operands[2]), <CODE>, GET_MODE (operands[0]),
+		       optimize_function_for_speed_p (cfun),
+		       true, operands[0], operands[0]);
+  DONE;
+})
+
 ;; Fix a decomposed szext: fuse it with the memory operand of the
 ;; load.  This is typically the sign-extension part of a decomposed
 ;; "indirect offset" address.
diff --git a/gcc/testsuite/gcc.target/cris/peep2-andsplit1.c b/gcc/testsuite/gcc.target/cris/peep2-andsplit1.c
new file mode 100644
index 000000000000..18b5cb8b17b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/cris/peep2-andsplit1.c
@@ -0,0 +1,25 @@ 
+/* Check that "opsplit1" with AND does its job.  */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int al0 (int x)
+{
+  return x & 0x7fffffff;
+}
+
+int alN (int x)
+{
+  return x & 63;
+}
+
+int ar0 (int x)
+{
+  return x & (-32*2);
+}
+
+int arN (int x)
+{
+  return x & 0x80000000;
+}
+
+/* { dg-final { scan-assembler-not "\[ \t\]and" } } */
diff --git a/gcc/testsuite/gcc.target/cris/peep2-andu1.c b/gcc/testsuite/gcc.target/cris/peep2-andu1.c
index 3b54c3295860..ab307b4fec5e 100644
--- a/gcc/testsuite/gcc.target/cris/peep2-andu1.c
+++ b/gcc/testsuite/gcc.target/cris/peep2-andu1.c
@@ -20,13 +20,13 @@  clearb (int x, int *y)
 int
 andb (int x, int *y)
 {
-  return *y & 0x3f;
+  return *y & 0x3d;
 }
 
 int
 andw (int x, int *y)
 {
-  return *y & 0xfff;
+  return *y & 0xffd;
 }
 
 int
diff --git a/gcc/testsuite/gcc.target/cris/peep2-andu2.c b/gcc/testsuite/gcc.target/cris/peep2-andu2.c
index fd19cdd906a9..f16f28861ac1 100644
--- a/gcc/testsuite/gcc.target/cris/peep2-andu2.c
+++ b/gcc/testsuite/gcc.target/cris/peep2-andu2.c
@@ -1,6 +1,6 @@ 
 /* { dg-do assemble } */
-/* { dg-final { scan-assembler "movu.w \\\$r10,\\\$|movu.w 2047," } } */
-/* { dg-final { scan-assembler "and.w 2047,\\\$|and.d \\\$r10," } } */
+/* { dg-final { scan-assembler "movu.w \\\$r10,\\\$|movu.w 2045," } } */
+/* { dg-final { scan-assembler "and.w 2045,\\\$|and.d \\\$r10," } } */
 /* { dg-final { scan-assembler-not "move.d \\\$r10,\\\$" } } */
 /* { dg-final { scan-assembler "movu.b \\\$r10,\\\$|movu.b 95," } } */
 /* { dg-final { scan-assembler "and.b 95,\\\$|and.d \\\$r10," } } */
@@ -19,7 +19,7 @@ 
 unsigned int
 and_peep2_hi (unsigned int y, unsigned int *x)
 {
-  *x = y & 0x7ff;
+  *x = y & 0x7fd;
   return y;
 }
 
diff --git a/gcc/testsuite/gcc.target/cris/peep2-xsrand.c b/gcc/testsuite/gcc.target/cris/peep2-xsrand.c
index df0e76886b47..9531f0a10ffe 100644
--- a/gcc/testsuite/gcc.target/cris/peep2-xsrand.c
+++ b/gcc/testsuite/gcc.target/cris/peep2-xsrand.c
@@ -1,7 +1,7 @@ 
 /* { dg-do compile } */
 /* { dg-final { scan-assembler "and.w " } } */
 /* { dg-final { scan-assembler "and.b " } } */
-/* { dg-final { scan-assembler-not "and.d" } } */
+/* { dg-final { scan-assembler-not "and.d " } } */
 /* { dg-options "-O2" } */
 
 /* Test the "asrandb", "asrandw", "lsrandb" and "lsrandw" peephole2:s
@@ -10,7 +10,7 @@ 
 unsigned int
 andwlsr (unsigned int x)
 {
-  return (x >> 17) & 0x7ff;
+  return (x >> 17) & 0x7fd;
 }
 
 unsigned int
@@ -22,7 +22,7 @@  andblsr (unsigned int x)
 int
 andwasr (int x)
 {
-  return (x >> 17) & 0x7ff;
+  return (x >> 17) & 0x7fd;
 }
 
 int
diff --git a/gcc/testsuite/gcc.target/cris/peep2-xsrand2.c b/gcc/testsuite/gcc.target/cris/peep2-xsrand2.c
index 5d6ca788d73a..12f26dfb0fc0 100644
--- a/gcc/testsuite/gcc.target/cris/peep2-xsrand2.c
+++ b/gcc/testsuite/gcc.target/cris/peep2-xsrand2.c
@@ -1,9 +1,9 @@ 
 /* { dg-do compile } */
 /* { dg-final { scan-assembler "and.w -137," } } */
-/* { dg-final { scan-assembler "and.b -64," } } */
+/* { dg-final { scan-assembler "and.b -62," } } */
 /* { dg-final { scan-assembler "and.w -139," } } */
 /* { dg-final { scan-assembler "and.b -63," } } */
-/* { dg-final { scan-assembler-not "and.d" } } */
+/* { dg-final { scan-assembler-not "and.d " } } */
 /* { dg-options "-O2" } */
 
 /* PR target/17984.  Test-case based on
@@ -18,7 +18,7 @@  andwlsr (unsigned int x)
 unsigned int
 andblsr (unsigned int x)
 {
-  return (x >> 24) & 0xc0;
+  return (x >> 24) & 0xc2;
 }
 
 int