optabs: Implement double-word ctz and ffs expansion

Message ID ZIC3TReXs9CBKEbz@tucnak
State Unresolved
Headers
Series optabs: Implement double-word ctz and ffs expansion |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Jakub Jelinek June 7, 2023, 4:58 p.m. UTC
  Hi!

We have expand_doubleword_clz for a couple of years, where we emit
double-word CLZ as if (high_word == 0) return CLZ (low_word) + word_size;
else return CLZ (high_word);
We can do something similar for CTZ and FFS IMHO, just with the 2
words swapped.  So if (low_word == 0) return CTZ (high_word) + word_size;
else return CTZ (low_word); for CTZ and
if (low_word == 0) { return high_word ? FFS (high_word) + word_size : 0;
else return FFS (low_word);

The following patch implements that.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Note, on some targets which implement both word_mode ctz and ffs patterns,
it might be better to incrementally implement those double-word ffs expansion
patterns in md files, because we aren't able to optimize it correctly;
nothing can detect we have just made sure that argument is not 0 and so
don't need to bother with handling that case.  So, on ia32 just using
CTZ patterns would be better there, but I think we can even do better and
instead of doing the comparisons of the operands against 0 do the CTZ
expansion followed by testing of flags.

2023-06-07  Jakub Jelinek  <jakub@redhat.com>

	* optabs.cc (expand_ffs): Add forward declaration.
	(expand_doubleword_clz): Rename to ...
	(expand_doubleword_clz_ctz_ffs): ... this.  Add UNOPTAB argument,
	handle also doubleword CTZ and FFS in addition to CLZ.
	(expand_unop): Adjust caller.  Also call it for doubleword
	ctz_optab and ffs_optab.

	* gcc.target/i386/ctzll-1.c: New test.
	* gcc.target/i386/ffsll-1.c: New test.


	Jakub
  

Comments

Richard Biener June 8, 2023, 5:55 a.m. UTC | #1
> Am 07.06.2023 um 18:59 schrieb Jakub Jelinek via Gcc-patches <gcc-patches@gcc.gnu.org>:
> 
> Hi!
> 
> We have expand_doubleword_clz for a couple of years, where we emit
> double-word CLZ as if (high_word == 0) return CLZ (low_word) + word_size;
> else return CLZ (high_word);
> We can do something similar for CTZ and FFS IMHO, just with the 2
> words swapped.  So if (low_word == 0) return CTZ (high_word) + word_size;
> else return CTZ (low_word); for CTZ and
> if (low_word == 0) { return high_word ? FFS (high_word) + word_size : 0;
> else return FFS (low_word);
> 
> The following patch implements that.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok

Richard 
> Note, on some targets which implement both word_mode ctz and ffs patterns,
> it might be better to incrementally implement those double-word ffs expansion
> patterns in md files, because we aren't able to optimize it correctly;
> nothing can detect we have just made sure that argument is not 0 and so
> don't need to bother with handling that case.  So, on ia32 just using
> CTZ patterns would be better there, but I think we can even do better and
> instead of doing the comparisons of the operands against 0 do the CTZ
> expansion followed by testing of flags.
> 
> 2023-06-07  Jakub Jelinek  <jakub@redhat.com>
> 
>    * optabs.cc (expand_ffs): Add forward declaration.
>    (expand_doubleword_clz): Rename to ...
>    (expand_doubleword_clz_ctz_ffs): ... this.  Add UNOPTAB argument,
>    handle also doubleword CTZ and FFS in addition to CLZ.
>    (expand_unop): Adjust caller.  Also call it for doubleword
>    ctz_optab and ffs_optab.
> 
>    * gcc.target/i386/ctzll-1.c: New test.
>    * gcc.target/i386/ffsll-1.c: New test.
> 
> --- gcc/optabs.cc.jj    2023-06-07 09:42:14.701130305 +0200
> +++ gcc/optabs.cc    2023-06-07 14:35:04.909879272 +0200
> @@ -2697,10 +2697,14 @@ expand_clrsb_using_clz (scalar_int_mode
>   return temp;
> }
> 
> -/* Try calculating clz of a double-word quantity as two clz's of word-sized
> -   quantities, choosing which based on whether the high word is nonzero.  */
> +static rtx expand_ffs (scalar_int_mode, rtx, rtx);
> +
> +/* Try calculating clz, ctz or ffs of a double-word quantity as two clz, ctz or
> +   ffs operations on word-sized quantities, choosing which based on whether the
> +   high (for clz) or low (for ctz and ffs) word is nonzero.  */
> static rtx
> -expand_doubleword_clz (scalar_int_mode mode, rtx op0, rtx target)
> +expand_doubleword_clz_ctz_ffs (scalar_int_mode mode, rtx op0, rtx target,
> +                   optab unoptab)
> {
>   rtx xop0 = force_reg (mode, op0);
>   rtx subhi = gen_highpart (word_mode, xop0);
> @@ -2709,6 +2713,7 @@ expand_doubleword_clz (scalar_int_mode m
>   rtx_code_label *after_label = gen_label_rtx ();
>   rtx_insn *seq;
>   rtx temp, result;
> +  int addend = 0;
> 
>   /* If we were not given a target, use a word_mode register, not a
>      'mode' register.  The result will fit, and nobody is expecting
> @@ -2721,6 +2726,9 @@ expand_doubleword_clz (scalar_int_mode m
>      'target' to tag a REG_EQUAL note on.  */
>   result = gen_reg_rtx (word_mode);
> 
> +  if (unoptab != clz_optab)
> +    std::swap (subhi, sublo);
> +
>   start_sequence ();
> 
>   /* If the high word is not equal to zero,
> @@ -2728,7 +2736,13 @@ expand_doubleword_clz (scalar_int_mode m
>   emit_cmp_and_jump_insns (subhi, CONST0_RTX (word_mode), EQ, 0,
>               word_mode, true, hi0_label);
> 
> -  temp = expand_unop_direct (word_mode, clz_optab, subhi, result, true);
> +  if (optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
> +    temp = expand_unop_direct (word_mode, unoptab, subhi, result, true);
> +  else
> +    {
> +      gcc_assert (unoptab == ffs_optab);
> +      temp = expand_ffs (word_mode, subhi, result);
> +    }
>   if (!temp)
>     goto fail;
> 
> @@ -2739,14 +2753,32 @@ expand_doubleword_clz (scalar_int_mode m
>   emit_barrier ();
> 
>   /* Else clz of the full value is clz of the low word plus the number
> -     of bits in the high word.  */
> +     of bits in the high word.  Similarly for ctz/ffs of the high word,
> +     except that ffs should be 0 when both words are zero.  */
>   emit_label (hi0_label);
> 
> -  temp = expand_unop_direct (word_mode, clz_optab, sublo, 0, true);
> +  if (unoptab == ffs_optab)
> +    {
> +      convert_move (result, const0_rtx, true);
> +      emit_cmp_and_jump_insns (sublo, CONST0_RTX (word_mode), EQ, 0,
> +                   word_mode, true, after_label);
> +    }
> +
> +  if (optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
> +    temp = expand_unop_direct (word_mode, unoptab, sublo, NULL_RTX, true);
> +  else
> +    {
> +      gcc_assert (unoptab == ffs_optab);
> +      temp = expand_unop_direct (word_mode, ctz_optab, sublo, NULL_RTX, true);
> +      addend = 1;
> +    }
> +
>   if (!temp)
>     goto fail;
> +
>   temp = expand_binop (word_mode, add_optab, temp,
> -               gen_int_mode (GET_MODE_BITSIZE (word_mode), word_mode),
> +               gen_int_mode (GET_MODE_BITSIZE (word_mode) + addend,
> +                     word_mode),
>               result, true, OPTAB_DIRECT);
>   if (!temp)
>     goto fail;
> @@ -2759,7 +2791,7 @@ expand_doubleword_clz (scalar_int_mode m
>   seq = get_insns ();
>   end_sequence ();
> 
> -  add_equal_note (seq, target, CLZ, xop0, NULL_RTX, mode);
> +  add_equal_note (seq, target, optab_to_code (unoptab), xop0, NULL_RTX, mode);
>   emit_insn (seq);
>   return target;
> 
> @@ -3252,7 +3284,8 @@ expand_unop (machine_mode mode, optab un
>      if (GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
>          && optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
>        {
> -          temp = expand_doubleword_clz (int_mode, op0, target);
> +          temp = expand_doubleword_clz_ctz_ffs (int_mode, op0, target,
> +                            unoptab);
>          if (temp)
>        return temp;
>        }
> @@ -3499,6 +3532,18 @@ expand_unop (machine_mode mode, optab un
>       if (temp)
>    return temp;
>     }
> +
> +  if ((unoptab == ctz_optab || unoptab == ffs_optab)
> +      && optimize_insn_for_speed_p ()
> +      && is_a <scalar_int_mode> (mode, &int_mode)
> +      && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
> +      && (optab_handler (unoptab, word_mode) != CODE_FOR_nothing
> +      || optab_handler (ctz_optab, word_mode) != CODE_FOR_nothing))
> +    {
> +      temp = expand_doubleword_clz_ctz_ffs (int_mode, op0, target, unoptab);
> +      if (temp)
> +    return temp;
> +    }
> 
>  try_libcall:
>   /* Now try a library call in this mode.  */
> --- gcc/testsuite/gcc.target/i386/ctzll-1.c.jj    2023-06-07 14:38:58.749648164 +0200
> +++ gcc/testsuite/gcc.target/i386/ctzll-1.c    2023-06-07 14:41:22.676659439 +0200
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { scan-assembler-not "__ctzdi2" } } */
> +
> +int
> +foo (unsigned long long x)
> +{
> +  return __builtin_ctzll (x);
> +}
> --- gcc/testsuite/gcc.target/i386/ffsll-1.c.jj    2023-06-07 14:40:00.859789953 +0200
> +++ gcc/testsuite/gcc.target/i386/ffsll-1.c    2023-06-07 14:41:15.104764068 +0200
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { scan-assembler-not "__ffsdi2" } } */
> +
> +int
> +foo (unsigned long long x)
> +{
> +  return __builtin_ffsll (x);
> +}
> 
>    Jakub
>
  

Patch

--- gcc/optabs.cc.jj	2023-06-07 09:42:14.701130305 +0200
+++ gcc/optabs.cc	2023-06-07 14:35:04.909879272 +0200
@@ -2697,10 +2697,14 @@  expand_clrsb_using_clz (scalar_int_mode
   return temp;
 }
 
-/* Try calculating clz of a double-word quantity as two clz's of word-sized
-   quantities, choosing which based on whether the high word is nonzero.  */
+static rtx expand_ffs (scalar_int_mode, rtx, rtx);
+
+/* Try calculating clz, ctz or ffs of a double-word quantity as two clz, ctz or
+   ffs operations on word-sized quantities, choosing which based on whether the
+   high (for clz) or low (for ctz and ffs) word is nonzero.  */
 static rtx
-expand_doubleword_clz (scalar_int_mode mode, rtx op0, rtx target)
+expand_doubleword_clz_ctz_ffs (scalar_int_mode mode, rtx op0, rtx target,
+			       optab unoptab)
 {
   rtx xop0 = force_reg (mode, op0);
   rtx subhi = gen_highpart (word_mode, xop0);
@@ -2709,6 +2713,7 @@  expand_doubleword_clz (scalar_int_mode m
   rtx_code_label *after_label = gen_label_rtx ();
   rtx_insn *seq;
   rtx temp, result;
+  int addend = 0;
 
   /* If we were not given a target, use a word_mode register, not a
      'mode' register.  The result will fit, and nobody is expecting
@@ -2721,6 +2726,9 @@  expand_doubleword_clz (scalar_int_mode m
      'target' to tag a REG_EQUAL note on.  */
   result = gen_reg_rtx (word_mode);
 
+  if (unoptab != clz_optab)
+    std::swap (subhi, sublo);
+
   start_sequence ();
 
   /* If the high word is not equal to zero,
@@ -2728,7 +2736,13 @@  expand_doubleword_clz (scalar_int_mode m
   emit_cmp_and_jump_insns (subhi, CONST0_RTX (word_mode), EQ, 0,
 			   word_mode, true, hi0_label);
 
-  temp = expand_unop_direct (word_mode, clz_optab, subhi, result, true);
+  if (optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
+    temp = expand_unop_direct (word_mode, unoptab, subhi, result, true);
+  else
+    {
+      gcc_assert (unoptab == ffs_optab);
+      temp = expand_ffs (word_mode, subhi, result);
+    }
   if (!temp)
     goto fail;
 
@@ -2739,14 +2753,32 @@  expand_doubleword_clz (scalar_int_mode m
   emit_barrier ();
 
   /* Else clz of the full value is clz of the low word plus the number
-     of bits in the high word.  */
+     of bits in the high word.  Similarly for ctz/ffs of the high word,
+     except that ffs should be 0 when both words are zero.  */
   emit_label (hi0_label);
 
-  temp = expand_unop_direct (word_mode, clz_optab, sublo, 0, true);
+  if (unoptab == ffs_optab)
+    {
+      convert_move (result, const0_rtx, true);
+      emit_cmp_and_jump_insns (sublo, CONST0_RTX (word_mode), EQ, 0,
+			       word_mode, true, after_label);
+    }
+
+  if (optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
+    temp = expand_unop_direct (word_mode, unoptab, sublo, NULL_RTX, true);
+  else
+    {
+      gcc_assert (unoptab == ffs_optab);
+      temp = expand_unop_direct (word_mode, ctz_optab, sublo, NULL_RTX, true);
+      addend = 1;
+    }
+
   if (!temp)
     goto fail;
+
   temp = expand_binop (word_mode, add_optab, temp,
-		       gen_int_mode (GET_MODE_BITSIZE (word_mode), word_mode),
+		       gen_int_mode (GET_MODE_BITSIZE (word_mode) + addend,
+				     word_mode),
 		       result, true, OPTAB_DIRECT);
   if (!temp)
     goto fail;
@@ -2759,7 +2791,7 @@  expand_doubleword_clz (scalar_int_mode m
   seq = get_insns ();
   end_sequence ();
 
-  add_equal_note (seq, target, CLZ, xop0, NULL_RTX, mode);
+  add_equal_note (seq, target, optab_to_code (unoptab), xop0, NULL_RTX, mode);
   emit_insn (seq);
   return target;
 
@@ -3252,7 +3284,8 @@  expand_unop (machine_mode mode, optab un
 	  if (GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
 	      && optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
 	    {
-	      temp = expand_doubleword_clz (int_mode, op0, target);
+	      temp = expand_doubleword_clz_ctz_ffs (int_mode, op0, target,
+						    unoptab);
 	      if (temp)
 		return temp;
 	    }
@@ -3499,6 +3532,18 @@  expand_unop (machine_mode mode, optab un
       if (temp)
 	return temp;
     }
+
+  if ((unoptab == ctz_optab || unoptab == ffs_optab)
+      && optimize_insn_for_speed_p ()
+      && is_a <scalar_int_mode> (mode, &int_mode)
+      && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
+      && (optab_handler (unoptab, word_mode) != CODE_FOR_nothing
+	  || optab_handler (ctz_optab, word_mode) != CODE_FOR_nothing))
+    {
+      temp = expand_doubleword_clz_ctz_ffs (int_mode, op0, target, unoptab);
+      if (temp)
+	return temp;
+    }
 
  try_libcall:
   /* Now try a library call in this mode.  */
--- gcc/testsuite/gcc.target/i386/ctzll-1.c.jj	2023-06-07 14:38:58.749648164 +0200
+++ gcc/testsuite/gcc.target/i386/ctzll-1.c	2023-06-07 14:41:22.676659439 +0200
@@ -0,0 +1,9 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "__ctzdi2" } } */
+
+int
+foo (unsigned long long x)
+{
+  return __builtin_ctzll (x);
+}
--- gcc/testsuite/gcc.target/i386/ffsll-1.c.jj	2023-06-07 14:40:00.859789953 +0200
+++ gcc/testsuite/gcc.target/i386/ffsll-1.c	2023-06-07 14:41:15.104764068 +0200
@@ -0,0 +1,9 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "__ffsdi2" } } */
+
+int
+foo (unsigned long long x)
+{
+  return __builtin_ffsll (x);
+}