Take the following case for example.
CFLAGS: -march=rv64gc_zbb_zicond -mabi=lp64d -O2
long
test_AND_ceqz (long x, long y, long z, long c)
{
if (c)
x = y & z;
else
x = y;
return x;
}
Before patch:
and a2,a1,a2
czero.eqz a0,a2,a3
czero.nez a3,a1,a3
or a0,a3,a0
ret
After patch:
and a0,a1,a2
czero.nez a1,a1,a3
or a0,a1,a0
ret
Co-authored-by: Xiao Zeng<zengxiao@eswincomputing.com>
gcc/ChangeLog:
* ifcvt.cc (noce_cond_zero_binary_op_supported): Add support for AND.
(noce_bbs_ok_for_cond_zero_arith): Likewise.
(noce_try_cond_zero_arith): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/zicond_ifcvt_opt.c: add TCs for AND.
---
gcc/ifcvt.cc | 69 ++++++--
.../gcc.target/riscv/zicond_ifcvt_opt.c | 163 +++++++++++++++++-
2 files changed, 211 insertions(+), 21 deletions(-)
On 12/5/23 01:12, Fei Gao wrote:
> Take the following case for example.
>
> CFLAGS: -march=rv64gc_zbb_zicond -mabi=lp64d -O2
>
> long
> test_AND_ceqz (long x, long y, long z, long c)
> {
> if (c)
> x = y & z;
> else
> x = y;
> return x;
> }
>
> Before patch:
>
> and a2,a1,a2
> czero.eqz a0,a2,a3
> czero.nez a3,a1,a3
> or a0,a3,a0
> ret
>
> After patch:
> and a0,a1,a2
> czero.nez a1,a1,a3
> or a0,a1,a0
FWIW I was having a conversation with Andrew W. a little while ago and
his preference was to change the IOR into an ADD. We have a better
chance of using a compressed variant as c.add allows the full set of
registers while c.or only allows a subset of registers. Given one of
the two input registers in that IOR will always have the value zero, it
should be equivalent.
Given this is target independent code we have to be careful, but I'm not
immediately aware of a target where using ADD would be worse than IOR
here. Anyway, feel free to submit a patch for that minor change, it'll
need to queue for gcc-15, but should be non-controversial at that time.
The same trick can be done in the conditional move expander within riscv.cc.
>
> Co-authored-by: Xiao Zeng<zengxiao@eswincomputing.com>
>
> gcc/ChangeLog:
>
> * ifcvt.cc (noce_cond_zero_binary_op_supported): Add support for AND.
> (noce_bbs_ok_for_cond_zero_arith): Likewise.
> (noce_try_cond_zero_arith): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/zicond_ifcvt_opt.c: add TCs for AND.
This looks fine. I'll need to adjust the matches in the test file
because we're not supporting SUBREGs. I can do that easy enough.
I 3-stage bootstrapped and regression tested this on x86_64 as well as
regression tested it in rv64gc.
I'll push it to the trunk shortly.
jeff
@@ -2922,7 +2922,7 @@ noce_cond_zero_binary_op_supported (rtx op)
if (opcode == PLUS || opcode == MINUS || opcode == IOR || opcode == XOR
|| opcode == ASHIFT || opcode == ASHIFTRT || opcode == LSHIFTRT
- || opcode == ROTATE || opcode == ROTATERT)
+ || opcode == ROTATE || opcode == ROTATERT || opcode == AND)
return true;
return false;
@@ -2954,6 +2954,7 @@ get_base_reg (rtx exp)
static bool
noce_bbs_ok_for_cond_zero_arith (struct noce_if_info *if_info, rtx *common_ptr,
+ rtx *bin_exp_ptr,
enum rtx_code *czero_code_ptr, rtx *a_ptr,
rtx **to_replace)
{
@@ -2998,7 +2999,7 @@ noce_bbs_ok_for_cond_zero_arith (struct noce_if_info *if_info, rtx *common_ptr,
{
common = b;
bin_op1 = XEXP (bin_exp, 1);
- czero_code = reverse
+ czero_code = (reverse ^ (GET_CODE (bin_exp) == AND))
? noce_reversed_cond_code (if_info)
: GET_CODE (cond);
}
@@ -3016,6 +3017,7 @@ noce_bbs_ok_for_cond_zero_arith (struct noce_if_info *if_info, rtx *common_ptr,
return false;
*common_ptr = common;
+ *bin_exp_ptr = bin_exp;
*czero_code_ptr = czero_code;
*a_ptr = a;
@@ -3029,38 +3031,67 @@ noce_bbs_ok_for_cond_zero_arith (struct noce_if_info *if_info, rtx *common_ptr,
static int
noce_try_cond_zero_arith (struct noce_if_info *if_info)
{
- rtx target, a;
+ rtx target, rtmp, a;
rtx_insn *seq;
machine_mode mode = GET_MODE (if_info->x);
rtx common = NULL_RTX;
enum rtx_code czero_code = UNKNOWN;
+ rtx bin_exp = NULL_RTX;
+ enum rtx_code bin_code = UNKNOWN;
rtx non_zero_op = NULL_RTX;
rtx *to_replace = NULL;
- if (!noce_bbs_ok_for_cond_zero_arith (if_info, &common, &czero_code, &a,
- &to_replace))
+ if (!noce_bbs_ok_for_cond_zero_arith (if_info, &common, &bin_exp, &czero_code,
+ &a, &to_replace))
return false;
- non_zero_op = *to_replace;
-
start_sequence ();
- /* If x is used in both input and out like x = c ? x + z : x,
- use a new reg to avoid modifying x */
- if (common && rtx_equal_p (common, if_info->x))
- target = gen_reg_rtx (mode);
- else
- target = if_info->x;
+ bin_code = GET_CODE (bin_exp);
- target = noce_emit_czero (if_info, czero_code, non_zero_op, target);
- if (!target || !to_replace)
+ if (bin_code == AND)
{
- end_sequence ();
- return false;
+ rtmp = gen_reg_rtx (mode);
+ noce_emit_move_insn (rtmp, a);
+
+ target = noce_emit_czero (if_info, czero_code, common, if_info->x);
+ if (!target)
+ {
+ end_sequence ();
+ return false;
+ }
+
+ target = expand_simple_binop (mode, IOR, rtmp, target, if_info->x, 0,
+ OPTAB_WIDEN);
+ if (!target)
+ {
+ end_sequence ();
+ return false;
+ }
+
+ if (target != if_info->x)
+ noce_emit_move_insn (if_info->x, target);
}
+ else
+ {
+ non_zero_op = *to_replace;
+ /* If x is used in both input and out like x = c ? x + z : x,
+ use a new reg to avoid modifying x */
+ if (common && rtx_equal_p (common, if_info->x))
+ target = gen_reg_rtx (mode);
+ else
+ target = if_info->x;
- *to_replace = target;
- noce_emit_move_insn (if_info->x, a);
+ target = noce_emit_czero (if_info, czero_code, non_zero_op, target);
+ if (!target || !to_replace)
+ {
+ end_sequence ();
+ return false;
+ }
+
+ *to_replace = target;
+ noce_emit_move_insn (if_info->x, a);
+ }
seq = end_ifcvt_sequence (if_info);
if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info))
@@ -615,5 +615,164 @@ test_RotateR_eqz (unsigned long x, unsigned long y, unsigned long z,
return x;
}
-/* { dg-final { scan-assembler-times {czero\.eqz} 33 } } */
-/* { dg-final { scan-assembler-times {czero\.nez} 28 } } */
+long
+test_AND_ceqz (long x, long y, long z, long c)
+{
+ if (c)
+ x = y & z;
+ else
+ x = y;
+ return x;
+}
+
+long
+test_AND_ceqz_x (long x, long z, long c)
+{
+ if (c)
+ x = x & z;
+
+ return x;
+}
+
+long
+test_AND_nez (long x, long y, long z, long c)
+{
+ if (c)
+ x = y;
+ else
+ x = y & z;
+ return x;
+}
+
+long
+test_AND_nez_x (long x, long z, long c)
+{
+ if (c)
+ {
+ }
+ else
+ x = x & z;
+ return x;
+}
+
+long
+test_AND_nez_2 (long x, long y, long z, long c)
+{
+ if (!c)
+ x = y & z;
+ else
+ x = y;
+ return x;
+}
+
+long
+test_AND_nez_x_2 (long x, long z, long c)
+{
+ if (!c)
+ x = x & z;
+
+ return x;
+}
+
+long
+test_AND_eqz_2 (long x, long y, long z, long c)
+{
+ if (!c)
+ x = y;
+ else
+ x = y & z;
+ return x;
+}
+
+long
+test_AND_eqz_x_2 (long x, long z, long c)
+{
+ if (!c)
+ {
+ }
+ else
+ x = x & z;
+ return x;
+}
+
+long
+test_AND_ceqz_reverse_bin_oprands (long x, long y, long z, long c)
+{
+ if (c)
+ x = z & y;
+ else
+ x = y;
+ return x;
+}
+
+long
+test_AND_ceqz_x_reverse_bin_oprands (long x, long z, long c)
+{
+ if (c)
+ x = z & x;
+
+ return x;
+}
+
+long
+test_AND_nez_reverse_bin_oprands (long x, long y, long z, long c)
+{
+ if (c)
+ x = y;
+ else
+ x = z & y;
+ return x;
+}
+
+long
+test_AND_nez_x_reverse_bin_oprands (long x, long z, long c)
+{
+ if (c)
+ {
+ }
+ else
+ x = z & x;
+ return x;
+}
+
+long
+test_AND_nez_2_reverse_bin_oprands (long x, long y, long z, long c)
+{
+ if (!c)
+ x = z & y;
+ else
+ x = y;
+ return x;
+}
+
+long
+test_AND_nez_x_2_reverse_bin_oprands (long x, long z, long c)
+{
+ if (!c)
+ x = z & x;
+
+ return x;
+}
+
+long
+test_AND_eqz_2_reverse_bin_oprands (long x, long y, long z, long c)
+{
+ if (!c)
+ x = y;
+ else
+ x = z & y;
+ return x;
+}
+
+long
+test_AND_eqz_x_2_reverse_bin_oprands (long x, long z, long c)
+{
+ if (!c)
+ {
+ }
+ else
+ x = z & x;
+ return x;
+}
+/* { dg-final { scan-assembler-times {czero\.eqz} 41 } } */
+/* { dg-final { scan-assembler-times {czero\.nez} 36 } } */