[3/5,ifcvt] optimize x=c ? (y AND z) : y by RISC-V Zicond like insns

Message ID 20231205081248.2106-3-gaofei@eswincomputing.com
State Unresolved
Headers
Series [1/5,V3,ifcvt] optimize x=c ? (y op z) : y by RISC-V Zicond like insns |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Fei Gao Dec. 5, 2023, 8:12 a.m. UTC
  Take the following case for example.

CFLAGS: -march=rv64gc_zbb_zicond -mabi=lp64d -O2

long
test_AND_ceqz (long x, long y, long z, long c)
{
  if (c)
    x = y & z;
  else
    x = y;
  return x;
}

Before patch:

and	a2,a1,a2
czero.eqz	a0,a2,a3
czero.nez	a3,a1,a3
or	a0,a3,a0
ret

After patch:
and	a0,a1,a2
czero.nez	a1,a1,a3
or	a0,a1,a0
ret

Co-authored-by: Xiao Zeng<zengxiao@eswincomputing.com>

gcc/ChangeLog:

	* ifcvt.cc (noce_cond_zero_binary_op_supported): Add support for AND.
        (noce_bbs_ok_for_cond_zero_arith): Likewise.
        (noce_try_cond_zero_arith): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/zicond_ifcvt_opt.c: add TCs for AND.
---
 gcc/ifcvt.cc                                  |  69 ++++++--
 .../gcc.target/riscv/zicond_ifcvt_opt.c       | 163 +++++++++++++++++-
 2 files changed, 211 insertions(+), 21 deletions(-)
  

Comments

Jeff Law Dec. 11, 2023, 5:16 a.m. UTC | #1
On 12/5/23 01:12, Fei Gao wrote:
> Take the following case for example.
> 
> CFLAGS: -march=rv64gc_zbb_zicond -mabi=lp64d -O2
> 
> long
> test_AND_ceqz (long x, long y, long z, long c)
> {
>    if (c)
>      x = y & z;
>    else
>      x = y;
>    return x;
> }
> 
> Before patch:
> 
> and	a2,a1,a2
> czero.eqz	a0,a2,a3
> czero.nez	a3,a1,a3
> or	a0,a3,a0
> ret
> 
> After patch:
> and	a0,a1,a2
> czero.nez	a1,a1,a3
> or	a0,a1,a0
FWIW I was having a conversation with Andrew W. a little while ago and 
his preference was to change the IOR into an ADD.  We have a better 
chance of using a compressed variant as c.add allows the full set of 
registers while c.or only allows a subset of registers.  Given one of 
the two input registers in that IOR will always have the value zero, it 
should be equivalent.

Given this is target independent code we have to be careful, but I'm not 
immediately aware of a target where using ADD would be worse than IOR 
here.   Anyway, feel free to submit a patch for that minor change, it'll 
need to queue for gcc-15, but should be non-controversial at that time.

The same trick can be done in the conditional move expander within riscv.cc.




> 
> Co-authored-by: Xiao Zeng<zengxiao@eswincomputing.com>
> 
> gcc/ChangeLog:
> 
> 	* ifcvt.cc (noce_cond_zero_binary_op_supported): Add support for AND.
>          (noce_bbs_ok_for_cond_zero_arith): Likewise.
>          (noce_try_cond_zero_arith): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/zicond_ifcvt_opt.c: add TCs for AND.
This looks fine.  I'll need to adjust the matches in the test file 
because we're not supporting SUBREGs.  I can do that easy enough.

I 3-stage bootstrapped and regression tested this on x86_64 as well as 
regression tested it in rv64gc.

I'll push it to the trunk shortly.

jeff
  

Patch

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index 2efae21ebfe..29f33f956eb 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -2922,7 +2922,7 @@  noce_cond_zero_binary_op_supported (rtx op)
 
   if (opcode == PLUS || opcode == MINUS || opcode == IOR || opcode == XOR
       || opcode == ASHIFT || opcode == ASHIFTRT || opcode == LSHIFTRT
-      || opcode == ROTATE || opcode == ROTATERT)
+      || opcode == ROTATE || opcode == ROTATERT || opcode == AND)
     return true;
 
   return false;
@@ -2954,6 +2954,7 @@  get_base_reg (rtx exp)
 
 static bool
 noce_bbs_ok_for_cond_zero_arith (struct noce_if_info *if_info, rtx *common_ptr,
+				 rtx *bin_exp_ptr,
 				 enum rtx_code *czero_code_ptr, rtx *a_ptr,
 				 rtx **to_replace)
 {
@@ -2998,7 +2999,7 @@  noce_bbs_ok_for_cond_zero_arith (struct noce_if_info *if_info, rtx *common_ptr,
     {
       common = b;
       bin_op1 = XEXP (bin_exp, 1);
-      czero_code = reverse
+      czero_code = (reverse ^ (GET_CODE (bin_exp) == AND))
 		     ? noce_reversed_cond_code (if_info)
 		     : GET_CODE (cond);
     }
@@ -3016,6 +3017,7 @@  noce_bbs_ok_for_cond_zero_arith (struct noce_if_info *if_info, rtx *common_ptr,
     return false;
 
   *common_ptr = common;
+  *bin_exp_ptr = bin_exp;
   *czero_code_ptr = czero_code;
   *a_ptr = a;
 
@@ -3029,38 +3031,67 @@  noce_bbs_ok_for_cond_zero_arith (struct noce_if_info *if_info, rtx *common_ptr,
 static int
 noce_try_cond_zero_arith (struct noce_if_info *if_info)
 {
-  rtx target, a;
+  rtx target, rtmp, a;
   rtx_insn *seq;
   machine_mode mode = GET_MODE (if_info->x);
   rtx common = NULL_RTX;
   enum rtx_code czero_code = UNKNOWN;
+  rtx bin_exp = NULL_RTX;
+  enum rtx_code bin_code = UNKNOWN;
   rtx non_zero_op = NULL_RTX;
   rtx *to_replace = NULL;
 
-  if (!noce_bbs_ok_for_cond_zero_arith (if_info, &common, &czero_code, &a,
-					&to_replace))
+  if (!noce_bbs_ok_for_cond_zero_arith (if_info, &common, &bin_exp, &czero_code,
+					&a, &to_replace))
     return false;
 
-  non_zero_op = *to_replace;
-
   start_sequence ();
 
-  /* If x is used in both input and out like x = c ? x + z : x,
-     use a new reg to avoid modifying x  */
-  if (common && rtx_equal_p (common, if_info->x))
-    target = gen_reg_rtx (mode);
-  else
-    target = if_info->x;
+  bin_code = GET_CODE (bin_exp);
 
-  target = noce_emit_czero (if_info, czero_code, non_zero_op, target);
-  if (!target || !to_replace)
+  if (bin_code == AND)
     {
-      end_sequence ();
-      return false;
+      rtmp = gen_reg_rtx (mode);
+      noce_emit_move_insn (rtmp, a);
+
+      target = noce_emit_czero (if_info, czero_code, common, if_info->x);
+      if (!target)
+	{
+	  end_sequence ();
+	  return false;
+	}
+
+      target = expand_simple_binop (mode, IOR, rtmp, target, if_info->x, 0,
+				    OPTAB_WIDEN);
+      if (!target)
+	{
+	  end_sequence ();
+	  return false;
+	}
+
+      if (target != if_info->x)
+	noce_emit_move_insn (if_info->x, target);
     }
+  else
+    {
+      non_zero_op = *to_replace;
+      /* If x is used in both input and out like x = c ? x + z : x,
+	 use a new reg to avoid modifying x  */
+      if (common && rtx_equal_p (common, if_info->x))
+	target = gen_reg_rtx (mode);
+      else
+	target = if_info->x;
 
-  *to_replace = target;
-  noce_emit_move_insn (if_info->x, a);
+      target = noce_emit_czero (if_info, czero_code, non_zero_op, target);
+      if (!target || !to_replace)
+	{
+	  end_sequence ();
+	  return false;
+	}
+
+      *to_replace = target;
+      noce_emit_move_insn (if_info->x, a);
+    }
 
   seq = end_ifcvt_sequence (if_info);
   if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info))
diff --git a/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c b/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c
index ab5a4909b61..d5310690539 100644
--- a/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c
+++ b/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c
@@ -615,5 +615,164 @@  test_RotateR_eqz (unsigned long x, unsigned long y, unsigned long z,
   return x;
 }
 
-/* { dg-final { scan-assembler-times {czero\.eqz} 33 } } */
-/* { dg-final { scan-assembler-times {czero\.nez} 28 } } */
+long
+test_AND_ceqz (long x, long y, long z, long c)
+{
+  if (c)
+    x = y & z;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_AND_ceqz_x (long x, long z, long c)
+{
+  if (c)
+    x = x & z;
+
+  return x;
+}
+
+long
+test_AND_nez (long x, long y, long z, long c)
+{
+  if (c)
+    x = y;
+  else
+    x = y & z;
+  return x;
+}
+
+long
+test_AND_nez_x (long x, long z, long c)
+{
+  if (c)
+    {
+    }
+  else
+    x = x & z;
+  return x;
+}
+
+long
+test_AND_nez_2 (long x, long y, long z, long c)
+{
+  if (!c)
+    x = y & z;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_AND_nez_x_2 (long x, long z, long c)
+{
+  if (!c)
+    x = x & z;
+
+  return x;
+}
+
+long
+test_AND_eqz_2 (long x, long y, long z, long c)
+{
+  if (!c)
+    x = y;
+  else
+    x = y & z;
+  return x;
+}
+
+long
+test_AND_eqz_x_2 (long x, long z, long c)
+{
+  if (!c)
+    {
+    }
+  else
+    x = x & z;
+  return x;
+}
+
+long
+test_AND_ceqz_reverse_bin_oprands (long x, long y, long z, long c)
+{
+  if (c)
+    x = z & y;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_AND_ceqz_x_reverse_bin_oprands (long x, long z, long c)
+{
+  if (c)
+    x = z & x;
+
+  return x;
+}
+
+long
+test_AND_nez_reverse_bin_oprands (long x, long y, long z, long c)
+{
+  if (c)
+    x = y;
+  else
+    x = z & y;
+  return x;
+}
+
+long
+test_AND_nez_x_reverse_bin_oprands (long x, long z, long c)
+{
+  if (c)
+    {
+    }
+  else
+    x = z & x;
+  return x;
+}
+
+long
+test_AND_nez_2_reverse_bin_oprands (long x, long y, long z, long c)
+{
+  if (!c)
+    x = z & y;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_AND_nez_x_2_reverse_bin_oprands (long x, long z, long c)
+{
+  if (!c)
+    x = z & x;
+
+  return x;
+}
+
+long
+test_AND_eqz_2_reverse_bin_oprands (long x, long y, long z, long c)
+{
+  if (!c)
+    x = y;
+  else
+    x = z & y;
+  return x;
+}
+
+long
+test_AND_eqz_x_2_reverse_bin_oprands (long x, long z, long c)
+{
+  if (!c)
+    {
+    }
+  else
+    x = z & x;
+  return x;
+}
+/* { dg-final { scan-assembler-times {czero\.eqz} 41 } } */
+/* { dg-final { scan-assembler-times {czero\.nez} 36 } } */