optabs: Implement double-word ctz and ffs expansion
Checks
Commit Message
Hi!
We have expand_doubleword_clz for a couple of years, where we emit
double-word CLZ as if (high_word == 0) return CLZ (low_word) + word_size;
else return CLZ (high_word);
We can do something similar for CTZ and FFS IMHO, just with the 2
words swapped. So if (low_word == 0) return CTZ (high_word) + word_size;
else return CTZ (low_word); for CTZ and
if (low_word == 0) { return high_word ? FFS (high_word) + word_size : 0;
else return FFS (low_word);
The following patch implements that.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Note, on some targets which implement both word_mode ctz and ffs patterns,
it might be better to incrementally implement those double-word ffs expansion
patterns in md files, because we aren't able to optimize it correctly;
nothing can detect we have just made sure that argument is not 0 and so
don't need to bother with handling that case. So, on ia32 just using
CTZ patterns would be better there, but I think we can even do better and
instead of doing the comparisons of the operands against 0 do the CTZ
expansion followed by testing of flags.
2023-06-07 Jakub Jelinek <jakub@redhat.com>
* optabs.cc (expand_ffs): Add forward declaration.
(expand_doubleword_clz): Rename to ...
(expand_doubleword_clz_ctz_ffs): ... this. Add UNOPTAB argument,
handle also doubleword CTZ and FFS in addition to CLZ.
(expand_unop): Adjust caller. Also call it for doubleword
ctz_optab and ffs_optab.
* gcc.target/i386/ctzll-1.c: New test.
* gcc.target/i386/ffsll-1.c: New test.
Jakub
Comments
> Am 07.06.2023 um 18:59 schrieb Jakub Jelinek via Gcc-patches <gcc-patches@gcc.gnu.org>:
>
> Hi!
>
> We have expand_doubleword_clz for a couple of years, where we emit
> double-word CLZ as if (high_word == 0) return CLZ (low_word) + word_size;
> else return CLZ (high_word);
> We can do something similar for CTZ and FFS IMHO, just with the 2
> words swapped. So if (low_word == 0) return CTZ (high_word) + word_size;
> else return CTZ (low_word); for CTZ and
> if (low_word == 0) { return high_word ? FFS (high_word) + word_size : 0;
> else return FFS (low_word);
>
> The following patch implements that.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok
Richard
> Note, on some targets which implement both word_mode ctz and ffs patterns,
> it might be better to incrementally implement those double-word ffs expansion
> patterns in md files, because we aren't able to optimize it correctly;
> nothing can detect we have just made sure that argument is not 0 and so
> don't need to bother with handling that case. So, on ia32 just using
> CTZ patterns would be better there, but I think we can even do better and
> instead of doing the comparisons of the operands against 0 do the CTZ
> expansion followed by testing of flags.
>
> 2023-06-07 Jakub Jelinek <jakub@redhat.com>
>
> * optabs.cc (expand_ffs): Add forward declaration.
> (expand_doubleword_clz): Rename to ...
> (expand_doubleword_clz_ctz_ffs): ... this. Add UNOPTAB argument,
> handle also doubleword CTZ and FFS in addition to CLZ.
> (expand_unop): Adjust caller. Also call it for doubleword
> ctz_optab and ffs_optab.
>
> * gcc.target/i386/ctzll-1.c: New test.
> * gcc.target/i386/ffsll-1.c: New test.
>
> --- gcc/optabs.cc.jj 2023-06-07 09:42:14.701130305 +0200
> +++ gcc/optabs.cc 2023-06-07 14:35:04.909879272 +0200
> @@ -2697,10 +2697,14 @@ expand_clrsb_using_clz (scalar_int_mode
> return temp;
> }
>
> -/* Try calculating clz of a double-word quantity as two clz's of word-sized
> - quantities, choosing which based on whether the high word is nonzero. */
> +static rtx expand_ffs (scalar_int_mode, rtx, rtx);
> +
> +/* Try calculating clz, ctz or ffs of a double-word quantity as two clz, ctz or
> + ffs operations on word-sized quantities, choosing which based on whether the
> + high (for clz) or low (for ctz and ffs) word is nonzero. */
> static rtx
> -expand_doubleword_clz (scalar_int_mode mode, rtx op0, rtx target)
> +expand_doubleword_clz_ctz_ffs (scalar_int_mode mode, rtx op0, rtx target,
> + optab unoptab)
> {
> rtx xop0 = force_reg (mode, op0);
> rtx subhi = gen_highpart (word_mode, xop0);
> @@ -2709,6 +2713,7 @@ expand_doubleword_clz (scalar_int_mode m
> rtx_code_label *after_label = gen_label_rtx ();
> rtx_insn *seq;
> rtx temp, result;
> + int addend = 0;
>
> /* If we were not given a target, use a word_mode register, not a
> 'mode' register. The result will fit, and nobody is expecting
> @@ -2721,6 +2726,9 @@ expand_doubleword_clz (scalar_int_mode m
> 'target' to tag a REG_EQUAL note on. */
> result = gen_reg_rtx (word_mode);
>
> + if (unoptab != clz_optab)
> + std::swap (subhi, sublo);
> +
> start_sequence ();
>
> /* If the high word is not equal to zero,
> @@ -2728,7 +2736,13 @@ expand_doubleword_clz (scalar_int_mode m
> emit_cmp_and_jump_insns (subhi, CONST0_RTX (word_mode), EQ, 0,
> word_mode, true, hi0_label);
>
> - temp = expand_unop_direct (word_mode, clz_optab, subhi, result, true);
> + if (optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
> + temp = expand_unop_direct (word_mode, unoptab, subhi, result, true);
> + else
> + {
> + gcc_assert (unoptab == ffs_optab);
> + temp = expand_ffs (word_mode, subhi, result);
> + }
> if (!temp)
> goto fail;
>
> @@ -2739,14 +2753,32 @@ expand_doubleword_clz (scalar_int_mode m
> emit_barrier ();
>
> /* Else clz of the full value is clz of the low word plus the number
> - of bits in the high word. */
> + of bits in the high word. Similarly for ctz/ffs of the high word,
> + except that ffs should be 0 when both words are zero. */
> emit_label (hi0_label);
>
> - temp = expand_unop_direct (word_mode, clz_optab, sublo, 0, true);
> + if (unoptab == ffs_optab)
> + {
> + convert_move (result, const0_rtx, true);
> + emit_cmp_and_jump_insns (sublo, CONST0_RTX (word_mode), EQ, 0,
> + word_mode, true, after_label);
> + }
> +
> + if (optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
> + temp = expand_unop_direct (word_mode, unoptab, sublo, NULL_RTX, true);
> + else
> + {
> + gcc_assert (unoptab == ffs_optab);
> + temp = expand_unop_direct (word_mode, ctz_optab, sublo, NULL_RTX, true);
> + addend = 1;
> + }
> +
> if (!temp)
> goto fail;
> +
> temp = expand_binop (word_mode, add_optab, temp,
> - gen_int_mode (GET_MODE_BITSIZE (word_mode), word_mode),
> + gen_int_mode (GET_MODE_BITSIZE (word_mode) + addend,
> + word_mode),
> result, true, OPTAB_DIRECT);
> if (!temp)
> goto fail;
> @@ -2759,7 +2791,7 @@ expand_doubleword_clz (scalar_int_mode m
> seq = get_insns ();
> end_sequence ();
>
> - add_equal_note (seq, target, CLZ, xop0, NULL_RTX, mode);
> + add_equal_note (seq, target, optab_to_code (unoptab), xop0, NULL_RTX, mode);
> emit_insn (seq);
> return target;
>
> @@ -3252,7 +3284,8 @@ expand_unop (machine_mode mode, optab un
> if (GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
> && optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
> {
> - temp = expand_doubleword_clz (int_mode, op0, target);
> + temp = expand_doubleword_clz_ctz_ffs (int_mode, op0, target,
> + unoptab);
> if (temp)
> return temp;
> }
> @@ -3499,6 +3532,18 @@ expand_unop (machine_mode mode, optab un
> if (temp)
> return temp;
> }
> +
> + if ((unoptab == ctz_optab || unoptab == ffs_optab)
> + && optimize_insn_for_speed_p ()
> + && is_a <scalar_int_mode> (mode, &int_mode)
> + && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
> + && (optab_handler (unoptab, word_mode) != CODE_FOR_nothing
> + || optab_handler (ctz_optab, word_mode) != CODE_FOR_nothing))
> + {
> + temp = expand_doubleword_clz_ctz_ffs (int_mode, op0, target, unoptab);
> + if (temp)
> + return temp;
> + }
>
> try_libcall:
> /* Now try a library call in this mode. */
> --- gcc/testsuite/gcc.target/i386/ctzll-1.c.jj 2023-06-07 14:38:58.749648164 +0200
> +++ gcc/testsuite/gcc.target/i386/ctzll-1.c 2023-06-07 14:41:22.676659439 +0200
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { scan-assembler-not "__ctzdi2" } } */
> +
> +int
> +foo (unsigned long long x)
> +{
> + return __builtin_ctzll (x);
> +}
> --- gcc/testsuite/gcc.target/i386/ffsll-1.c.jj 2023-06-07 14:40:00.859789953 +0200
> +++ gcc/testsuite/gcc.target/i386/ffsll-1.c 2023-06-07 14:41:15.104764068 +0200
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { scan-assembler-not "__ffsdi2" } } */
> +
> +int
> +foo (unsigned long long x)
> +{
> + return __builtin_ffsll (x);
> +}
>
> Jakub
>
@@ -2697,10 +2697,14 @@ expand_clrsb_using_clz (scalar_int_mode
return temp;
}
-/* Try calculating clz of a double-word quantity as two clz's of word-sized
- quantities, choosing which based on whether the high word is nonzero. */
+static rtx expand_ffs (scalar_int_mode, rtx, rtx);
+
+/* Try calculating clz, ctz or ffs of a double-word quantity as two clz, ctz or
+ ffs operations on word-sized quantities, choosing which based on whether the
+ high (for clz) or low (for ctz and ffs) word is nonzero. */
static rtx
-expand_doubleword_clz (scalar_int_mode mode, rtx op0, rtx target)
+expand_doubleword_clz_ctz_ffs (scalar_int_mode mode, rtx op0, rtx target,
+ optab unoptab)
{
rtx xop0 = force_reg (mode, op0);
rtx subhi = gen_highpart (word_mode, xop0);
@@ -2709,6 +2713,7 @@ expand_doubleword_clz (scalar_int_mode m
rtx_code_label *after_label = gen_label_rtx ();
rtx_insn *seq;
rtx temp, result;
+ int addend = 0;
/* If we were not given a target, use a word_mode register, not a
'mode' register. The result will fit, and nobody is expecting
@@ -2721,6 +2726,9 @@ expand_doubleword_clz (scalar_int_mode m
'target' to tag a REG_EQUAL note on. */
result = gen_reg_rtx (word_mode);
+ if (unoptab != clz_optab)
+ std::swap (subhi, sublo);
+
start_sequence ();
/* If the high word is not equal to zero,
@@ -2728,7 +2736,13 @@ expand_doubleword_clz (scalar_int_mode m
emit_cmp_and_jump_insns (subhi, CONST0_RTX (word_mode), EQ, 0,
word_mode, true, hi0_label);
- temp = expand_unop_direct (word_mode, clz_optab, subhi, result, true);
+ if (optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
+ temp = expand_unop_direct (word_mode, unoptab, subhi, result, true);
+ else
+ {
+ gcc_assert (unoptab == ffs_optab);
+ temp = expand_ffs (word_mode, subhi, result);
+ }
if (!temp)
goto fail;
@@ -2739,14 +2753,32 @@ expand_doubleword_clz (scalar_int_mode m
emit_barrier ();
/* Else clz of the full value is clz of the low word plus the number
- of bits in the high word. */
+ of bits in the high word. Similarly for ctz/ffs of the high word,
+ except that ffs should be 0 when both words are zero. */
emit_label (hi0_label);
- temp = expand_unop_direct (word_mode, clz_optab, sublo, 0, true);
+ if (unoptab == ffs_optab)
+ {
+ convert_move (result, const0_rtx, true);
+ emit_cmp_and_jump_insns (sublo, CONST0_RTX (word_mode), EQ, 0,
+ word_mode, true, after_label);
+ }
+
+ if (optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
+ temp = expand_unop_direct (word_mode, unoptab, sublo, NULL_RTX, true);
+ else
+ {
+ gcc_assert (unoptab == ffs_optab);
+ temp = expand_unop_direct (word_mode, ctz_optab, sublo, NULL_RTX, true);
+ addend = 1;
+ }
+
if (!temp)
goto fail;
+
temp = expand_binop (word_mode, add_optab, temp,
- gen_int_mode (GET_MODE_BITSIZE (word_mode), word_mode),
+ gen_int_mode (GET_MODE_BITSIZE (word_mode) + addend,
+ word_mode),
result, true, OPTAB_DIRECT);
if (!temp)
goto fail;
@@ -2759,7 +2791,7 @@ expand_doubleword_clz (scalar_int_mode m
seq = get_insns ();
end_sequence ();
- add_equal_note (seq, target, CLZ, xop0, NULL_RTX, mode);
+ add_equal_note (seq, target, optab_to_code (unoptab), xop0, NULL_RTX, mode);
emit_insn (seq);
return target;
@@ -3252,7 +3284,8 @@ expand_unop (machine_mode mode, optab un
if (GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
&& optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
{
- temp = expand_doubleword_clz (int_mode, op0, target);
+ temp = expand_doubleword_clz_ctz_ffs (int_mode, op0, target,
+ unoptab);
if (temp)
return temp;
}
@@ -3499,6 +3532,18 @@ expand_unop (machine_mode mode, optab un
if (temp)
return temp;
}
+
+ if ((unoptab == ctz_optab || unoptab == ffs_optab)
+ && optimize_insn_for_speed_p ()
+ && is_a <scalar_int_mode> (mode, &int_mode)
+ && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
+ && (optab_handler (unoptab, word_mode) != CODE_FOR_nothing
+ || optab_handler (ctz_optab, word_mode) != CODE_FOR_nothing))
+ {
+ temp = expand_doubleword_clz_ctz_ffs (int_mode, op0, target, unoptab);
+ if (temp)
+ return temp;
+ }
try_libcall:
/* Now try a library call in this mode. */
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "__ctzdi2" } } */
+
+int
+foo (unsigned long long x)
+{
+ return __builtin_ctzll (x);
+}
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "__ffsdi2" } } */
+
+int
+foo (unsigned long long x)
+{
+ return __builtin_ffsll (x);
+}