i386: Fix up BFmode comparisons in conditional moves [PR107322]
Checks
Commit Message
Hi!
As the testcase shows, when cbranchbf4/cstorebf4 patterns are defined,
we can get ICEs for conditional moves.
The problem is that the generic conditional move expansion just calls
prepare_cmp_insn which just checks that such a cbranch<mode>4 exists
and returns directly such comparison and passes it down to the conditional
move optabs.
The following patch fixes it by punting if the comparisons aren't
ix86_fp_comparison_operator (to tell the generic code it should separately
compare) and to handle the promotion of BFmode comparison operands to
SFmode such that comparison is performed in SFmode.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2022-10-21 Jakub Jelinek <jakub@redhat.com>
PR target/107322
* config/i386/i386-expand.cc (ix86_prepare_fp_compare_args): For
BFmode comparisons promote arguments to SFmode and recurse.
(ix86_expand_int_movcc, ix86_expand_fp_movcc): Return false early
if comparison operands are BFmode and operands[1] is not
ix86_fp_comparison_operator.
* gcc.target/i386/pr107322.c: New test.
Jakub
Comments
On Fri, Oct 21, 2022 at 9:15 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> As the testcase shows, when cbranchbf4/cstorebf4 patterns are defined,
> we can get ICEs for conditional moves.
> The problem is that the generic conditional move expansion just calls
> prepare_cmp_insn which just checks that such a cbranch<mode>4 exists
> and returns directly such comparison and passes it down to the conditional
> move optabs.
> The following patch fixes it by punting if the comparisons aren't
> ix86_fp_comparison_operator (to tell the generic code it should separately
> compare) and to handle the promotion of BFmode comparison operands to
> SFmode such that comparison is performed in SFmode.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2022-10-21 Jakub Jelinek <jakub@redhat.com>
>
> PR target/107322
> * config/i386/i386-expand.cc (ix86_prepare_fp_compare_args): For
> BFmode comparisons promote arguments to SFmode and recurse.
> (ix86_expand_int_movcc, ix86_expand_fp_movcc): Return false early
> if comparison operands are BFmode and operands[1] is not
> ix86_fp_comparison_operator.
>
> * gcc.target/i386/pr107322.c: New test.
OK, but now we have two more copies of a function that effectively
extends BF to SF. Can you please split this utility function out and
use it here and in cbranchbf4/cstorebf4? I'm talking about this part:
+ op = gen_lowpart (HImode, op1);
+ if (CONST_INT_P (op))
+ op = simplify_const_unary_operation (FLOAT_EXTEND, SFmode,
+ op1, BFmode);
+ else
+ {
+ rtx t1 = gen_reg_rtx (SImode);
+ emit_insn (gen_zero_extendhisi2 (t1, op));
+ emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16)));
+ op = gen_lowpart (SFmode, t1);
+ }
Taking this a bit further, it looks like a generic function to extend
BF to SF, when extendbfsf2 named function is not defined.
The above could be a follow-up patch, the proposed patch is OK.
On a related note, I still think that without corresponding BFmode
expanders, generic middle-end code should extend BFmode to SFmode and
perform all comparisons in SFmode, in effect what cbranchbf4/cstorebf4
x86 expanders are doing now by themselves. This would allow
cbranchbf4/cstorebf4 to fail (or to not be present), and still result
in optimal code without intermediate extends and truncations.
Thanks,
Uros.
> --- gcc/config/i386/i386-expand.cc.jj 2022-10-19 11:20:54.602879162 +0200
> +++ gcc/config/i386/i386-expand.cc 2022-10-20 12:15:37.750758679 +0200
> @@ -2626,6 +2626,35 @@ ix86_prepare_fp_compare_args (enum rtx_c
> machine_mode op_mode = GET_MODE (op0);
> bool is_sse = SSE_FLOAT_MODE_SSEMATH_OR_HF_P (op_mode);
>
> + if (op_mode == BFmode)
> + {
> + rtx op = gen_lowpart (HImode, op0);
> + if (CONST_INT_P (op))
> + op = simplify_const_unary_operation (FLOAT_EXTEND, SFmode,
> + op0, BFmode);
> + else
> + {
> + rtx t1 = gen_reg_rtx (SImode);
> + emit_insn (gen_zero_extendhisi2 (t1, op));
> + emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16)));
> + op = gen_lowpart (SFmode, t1);
> + }
> + *pop0 = op;
> + op = gen_lowpart (HImode, op1);
> + if (CONST_INT_P (op))
> + op = simplify_const_unary_operation (FLOAT_EXTEND, SFmode,
> + op1, BFmode);
> + else
> + {
> + rtx t1 = gen_reg_rtx (SImode);
> + emit_insn (gen_zero_extendhisi2 (t1, op));
> + emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16)));
> + op = gen_lowpart (SFmode, t1);
> + }
> + *pop1 = op;
> + return ix86_prepare_fp_compare_args (code, pop0, pop1);
> + }
> +
> /* All of the unordered compare instructions only work on registers.
> The same is true of the fcomi compare instructions. The XFmode
> compare instructions require registers except when comparing
> @@ -3164,6 +3193,10 @@ ix86_expand_int_movcc (rtx operands[])
> && !TARGET_64BIT))
> return false;
>
> + if (GET_MODE (op0) == BFmode
> + && !ix86_fp_comparison_operator (operands[1], VOIDmode))
> + return false;
> +
> start_sequence ();
> compare_op = ix86_expand_compare (code, op0, op1);
> compare_seq = get_insns ();
> @@ -4238,6 +4271,10 @@ ix86_expand_fp_movcc (rtx operands[])
> rtx op0 = XEXP (operands[1], 0);
> rtx op1 = XEXP (operands[1], 1);
>
> + if (GET_MODE (op0) == BFmode
> + && !ix86_fp_comparison_operator (operands[1], VOIDmode))
> + return false;
> +
> if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
> {
> machine_mode cmode;
> --- gcc/testsuite/gcc.target/i386/pr107322.c.jj 2022-10-20 12:28:46.829983399 +0200
> +++ gcc/testsuite/gcc.target/i386/pr107322.c 2022-10-20 12:29:44.287201650 +0200
> @@ -0,0 +1,33 @@
> +/* PR target/107322 */
> +/* { dg-do compile } */
> +/* { dg-options "-fexcess-precision=16 -O -msse2 -mfpmath=sse" } */
> +
> +int i, j;
> +float k, l;
> +__bf16 f;
> +
> +void
> +foo (void)
> +{
> + i *= 0 >= f;
> +}
> +
> +void
> +bar (void)
> +{
> + i *= 0 <= f;
> +}
> +
> +void
> +baz (int x, int y)
> +{
> + i = 0 >= f ? x : y;
> + j = 0 <= f ? x + 2 : y + 3;
> +}
> +
> +void
> +qux (float x, float y)
> +{
> + k = 0 >= f ? x : y;
> + l = 0 <= f ? x + 2 : y + 3;
> +}
>
> Jakub
>
@@ -2626,6 +2626,35 @@ ix86_prepare_fp_compare_args (enum rtx_c
machine_mode op_mode = GET_MODE (op0);
bool is_sse = SSE_FLOAT_MODE_SSEMATH_OR_HF_P (op_mode);
+ if (op_mode == BFmode)
+ {
+ rtx op = gen_lowpart (HImode, op0);
+ if (CONST_INT_P (op))
+ op = simplify_const_unary_operation (FLOAT_EXTEND, SFmode,
+ op0, BFmode);
+ else
+ {
+ rtx t1 = gen_reg_rtx (SImode);
+ emit_insn (gen_zero_extendhisi2 (t1, op));
+ emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16)));
+ op = gen_lowpart (SFmode, t1);
+ }
+ *pop0 = op;
+ op = gen_lowpart (HImode, op1);
+ if (CONST_INT_P (op))
+ op = simplify_const_unary_operation (FLOAT_EXTEND, SFmode,
+ op1, BFmode);
+ else
+ {
+ rtx t1 = gen_reg_rtx (SImode);
+ emit_insn (gen_zero_extendhisi2 (t1, op));
+ emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16)));
+ op = gen_lowpart (SFmode, t1);
+ }
+ *pop1 = op;
+ return ix86_prepare_fp_compare_args (code, pop0, pop1);
+ }
+
/* All of the unordered compare instructions only work on registers.
The same is true of the fcomi compare instructions. The XFmode
compare instructions require registers except when comparing
@@ -3164,6 +3193,10 @@ ix86_expand_int_movcc (rtx operands[])
&& !TARGET_64BIT))
return false;
+ if (GET_MODE (op0) == BFmode
+ && !ix86_fp_comparison_operator (operands[1], VOIDmode))
+ return false;
+
start_sequence ();
compare_op = ix86_expand_compare (code, op0, op1);
compare_seq = get_insns ();
@@ -4238,6 +4271,10 @@ ix86_expand_fp_movcc (rtx operands[])
rtx op0 = XEXP (operands[1], 0);
rtx op1 = XEXP (operands[1], 1);
+ if (GET_MODE (op0) == BFmode
+ && !ix86_fp_comparison_operator (operands[1], VOIDmode))
+ return false;
+
if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
{
machine_mode cmode;
@@ -0,0 +1,33 @@
+/* PR target/107322 */
+/* { dg-do compile } */
+/* { dg-options "-fexcess-precision=16 -O -msse2 -mfpmath=sse" } */
+
+int i, j;
+float k, l;
+__bf16 f;
+
+void
+foo (void)
+{
+ i *= 0 >= f;
+}
+
+void
+bar (void)
+{
+ i *= 0 <= f;
+}
+
+void
+baz (int x, int y)
+{
+ i = 0 >= f ? x : y;
+ j = 0 <= f ? x + 2 : y + 3;
+}
+
+void
+qux (float x, float y)
+{
+ k = 0 >= f ? x : y;
+ l = 0 <= f ? x + 2 : y + 3;
+}