[take,#3] match.pd: Simplify popcount/parity of bswap/rotate.

Message ID 000a01d9834e$2a7811e0$7f6835a0$@nextmovesoftware.com
State Accepted
Headers
Series [take,#3] match.pd: Simplify popcount/parity of bswap/rotate. |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Roger Sayle May 10, 2023, 2:46 p.m. UTC
  This is the latest iteration of my patch from August 2020
https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552391.html
incorporating feedback and suggestions from reviewers.

This patch to match.pd optimizes away bit permutation operations,
specifically bswap and rotate, in calls to popcount and parity.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?


2023-05-10  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
	* match.pd <popcount optimizations>: Simplify popcount(bswap(x))
	as popcount(x).  Simplify popcount(rotate(x,y)) as popcount(x).
	<parity optimizations>:  Simplify parity(bswap(x)) as parity(x).
	Simplify parity(rotate(x,y)) as parity(x).

gcc/testsuite/ChangeLog
	* gcc.dg/fold-parity-6.c: New test.
	* gcc.dg/fold-parity-7.c: New test.
	* gcc.dg/fold-popcount-6.c: New test.
	* gcc.dg/fold-popcount-7.c: New test.


Thanks again,
Roger
--
  

Comments

Bernhard Reutner-Fischer May 10, 2023, 3:47 p.m. UTC | #1
Hi Roger!
On 10 May 2023 16:46:10 CEST, Roger Sayle <roger@nextmovesoftware.com> wrote:

Just a nit:

+/* { dg-final { scan-tree-dump-times "bswap" 0 "optimized" } } */

Can you please use scan-tree-dump-not instead?
thanks,
  
Jeff Law May 10, 2023, 11:15 p.m. UTC | #2
On 5/10/23 09:47, Bernhard Reutner-Fischer via Gcc-patches wrote:
> Hi Roger!
> On 10 May 2023 16:46:10 CEST, Roger Sayle <roger@nextmovesoftware.com> wrote:
> 
> Just a nit:
> 
> +/* { dg-final { scan-tree-dump-times "bswap" 0 "optimized" } } */
> 
> Can you please use scan-tree-dump-not instead?
OK with that change.  I considered asking for the patterns to be merged, 
but I think it's easier to read with them separate.

jeff
  

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index ceae1c3..bc083be 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -7766,6 +7766,32 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       (cmp (popcount @0) integer_zerop)
       (rep @0 { build_zero_cst (TREE_TYPE (@0)); }))))
 
+/* popcount(bswap(x)) is popcount(x).  */
+(for popcount (POPCOUNT)
+  (for bswap (BUILT_IN_BSWAP16 BUILT_IN_BSWAP32
+	      BUILT_IN_BSWAP64 BUILT_IN_BSWAP128)
+    (simplify
+      (popcount (convert?@0 (bswap:s@1 @2)))
+      (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+	   && INTEGRAL_TYPE_P (TREE_TYPE (@1)))
+	(with { unsigned int prec0 = TYPE_PRECISION (TREE_TYPE (@0));
+		unsigned int prec1 = TYPE_PRECISION (TREE_TYPE (@1)); }
+	  (if (prec0 == prec1 || (prec0 > prec1 && TYPE_UNSIGNED (@1)))
+	    (popcount @2)))))))
+
+/* popcount(rotate(X Y)) is popcount(X).  */
+(for popcount (POPCOUNT)
+  (for rot (lrotate rrotate)
+    (simplify
+      (popcount (convert?@0 (rot:s@1 @2 @3)))
+      (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+	   && INTEGRAL_TYPE_P (TREE_TYPE (@1))	
+	   && (GIMPLE || !TREE_SIDE_EFFECTS (@3)))
+	(with { unsigned int prec0 = TYPE_PRECISION (TREE_TYPE (@0));
+		unsigned int prec1 = TYPE_PRECISION (TREE_TYPE (@1)); }
+	  (if (prec0 == prec1 || (prec0 > prec1 && TYPE_UNSIGNED (@1)))
+	    (popcount @2)))))))
+
 /* Canonicalize POPCOUNT(x)&1 as PARITY(X).  */
 (simplify
   (bit_and (POPCOUNT @0) integer_onep)
@@ -7777,6 +7803,30 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (PARITY (bit_not @0))
   (PARITY @0))
 
+/* parity(bswap(x)) is parity(x).  */
+(for parity (PARITY)
+  (for bswap (BUILT_IN_BSWAP16 BUILT_IN_BSWAP32
+	      BUILT_IN_BSWAP64 BUILT_IN_BSWAP128)
+    (simplify
+      (parity (convert?@0 (bswap:s@1 @2)))
+      (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+	   && INTEGRAL_TYPE_P (TREE_TYPE (@1))
+	   && TYPE_PRECISION (TREE_TYPE (@0))
+	      >= TYPE_PRECISION (TREE_TYPE (@1)))
+	(parity @2)))))
+
+/* parity(rotate(X Y)) is parity(X).  */
+(for parity (PARITY)
+  (for rot (lrotate rrotate)
+    (simplify
+      (parity (convert?@0 (rot:s@1 @2 @3)))
+      (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+	   && INTEGRAL_TYPE_P (TREE_TYPE (@1))
+	   && (GIMPLE || !TREE_SIDE_EFFECTS (@3))
+	   && TYPE_PRECISION (TREE_TYPE (@0))
+	      >= TYPE_PRECISION (TREE_TYPE (@1)))
+	(parity @2)))))
+
 /* parity(X)^parity(Y) is parity(X^Y).  */
 (simplify
   (bit_xor (PARITY:s @0) (PARITY:s @1))
diff --git a/gcc/testsuite/gcc.dg/fold-parity-6.c b/gcc/testsuite/gcc.dg/fold-parity-6.c
new file mode 100644
index 0000000..623afb9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-parity-6.c
@@ -0,0 +1,37 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(unsigned int x)
+{
+#if __SIZEOF_INT__ == 4
+  return __builtin_parity (__builtin_bswap32(x));
+#elif __SIZEOF_INT__ == 2
+  return __builtin_parity (__builtin_bswap16(x));
+#else
+  return x;
+#endif
+}
+
+int bar(unsigned long x)
+{
+#if __SIZEOF_LONG__ == 8
+  return __builtin_parityl (__builtin_bswap64(x));
+#elif __SIZEOF_LONG__ == 4
+  return __builtin_parityl (__builtin_bswap32(x));
+#else
+  return x;
+#endif
+}
+
+int baz(unsigned long long x)
+{
+#if __SIZEOF_LONG_LONG__ == 8
+  return __builtin_parityll (__builtin_bswap64(x));
+#elif __SIZEOF_LONG_LONG__ == 4
+  return __builtin_parityll (__builtin_bswap32(x));
+#else
+  return x;
+#endif
+}
+
+/* { dg-final { scan-tree-dump-times "bswap" 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/fold-parity-7.c b/gcc/testsuite/gcc.dg/fold-parity-7.c
new file mode 100644
index 0000000..c08cdee
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-parity-7.c
@@ -0,0 +1,43 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(unsigned int x)
+{
+#if __SIZEOF_INT__ == 4
+  unsigned int y = (x>>4) | (x<<28);
+  return __builtin_parity(y);
+#elif __SIZEOF_INT__ == 2
+  unsigned int y = (x>>4) | (x<<12);
+  return __builtin_parity(y);
+#else
+  return x;
+#endif
+}
+
+int bar(unsigned long x)
+{
+#if __SIZEOF_LONG__ == 8
+  unsigned long y = (x>>4) | (x<<60);
+  return __builtin_parityl (y);
+#elif __SIZEOF_LONG__ == 4
+  unsigned long y = (x>>4) | (x<<28);
+  return __builtin_parityl (y);
+#else
+  return x;
+#endif
+}
+
+int baz(unsigned long long x)
+{
+#if __SIZEOF_LONG_LONG__ == 8
+  unsigned long long y = (x>>4) | (x<<60);
+  return __builtin_parityll (y);
+#elif __SIZEOF_LONG_LONG__ == 4
+  unsigned long long y = (x>>4) | (x<<28);
+  return __builtin_parityll (y);
+#else
+  return x;
+#endif
+}
+
+/* { dg-final { scan-tree-dump-times " r>> " 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/fold-popcount-6.c b/gcc/testsuite/gcc.dg/fold-popcount-6.c
new file mode 100644
index 0000000..e7f38d0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-popcount-6.c
@@ -0,0 +1,37 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(unsigned int x)
+{
+#if __SIZEOF_INT__ == 4
+  return __builtin_popcount (__builtin_bswap32(x));
+#elif __SIZEOF_INT__ == 2
+  return __builtin_popcount (__builtin_bswap16(x));
+#else
+  return x;
+#endif
+}
+
+int bar(unsigned long x)
+{
+#if __SIZEOF_LONG__ == 8
+  return __builtin_popcountl (__builtin_bswap64(x));
+#elif __SIZEOF_LONG__ == 4
+  return __builtin_popcountl (__builtin_bswap32(x));
+#else
+  return x;
+#endif
+}
+
+int baz(unsigned long long x)
+{
+#if __SIZEOF_LONG_LONG__ == 8
+  return __builtin_popcountll (__builtin_bswap64(x));
+#elif __SIZEOF_LONG_LONG__ == 4
+  return __builtin_popcountll (__builtin_bswap32(x));
+#else
+  return x;
+#endif
+}
+
+/* { dg-final { scan-tree-dump-times "bswap" 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/fold-popcount-7.c b/gcc/testsuite/gcc.dg/fold-popcount-7.c
new file mode 100644
index 0000000..dcfdf5f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-popcount-7.c
@@ -0,0 +1,43 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(unsigned int x)
+{
+#if __SIZEOF_INT__ == 4
+  unsigned int y = (x>>4) | (x<<28);
+  return __builtin_popcount(y);
+#elif __SIZEOF_INT__ == 2
+  unsigned int y = (x>>4) | (x<<12);
+  return __builtin_popcount(y);
+#else
+  return x;
+#endif
+}
+
+int bar(unsigned long x)
+{
+#if __SIZEOF_LONG__ == 8
+  unsigned long y = (x>>4) | (x<<60);
+  return __builtin_popcountl (y);
+#elif __SIZEOF_LONG__ == 4
+  unsigned long y = (x>>4) | (x<<28);
+  return __builtin_popcountl (y);
+#else
+  return x;
+#endif
+}
+
+int baz(unsigned long long x)
+{
+#if __SIZEOF_LONG_LONG__ == 8
+  unsigned long long y = (x>>4) | (x<<60);
+  return __builtin_popcountll (y);
+#elif __SIZEOF_LONG_LONG__ == 4
+  unsigned long long y = (x>>4) | (x<<28);
+  return __builtin_popcountll (y);
+#else
+  return x;
+#endif
+}
+
+/* { dg-final { scan-tree-dump-times " r>> " 0 "optimized" } } */