[1/5,V3,ifcvt] optimize x=c ? (y op z) : y by RISC-V Zicond like insns

Message ID 20231205081248.2106-1-gaofei@eswincomputing.com
State Unresolved
Headers
Series [1/5,V3,ifcvt] optimize x=c ? (y op z) : y by RISC-V Zicond like insns |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Fei Gao Dec. 5, 2023, 8:12 a.m. UTC
  op=[PLUS, MINUS, IOR, XOR]

Conditional op, if zero
rd = (rc == 0) ? (rs1 op rs2) : rs1
-->
czero.nez rd, rs2, rc
op rd, rs1, rd

Conditional op, if non-zero
rd = (rc != 0) ? (rs1 op rs2) : rs1
-->
czero.eqz rd, rs2, rc
op rd, rs1, rd

Co-authored-by: Xiao Zeng<zengxiao@eswincomputing.com>

gcc/ChangeLog:

	* ifcvt.cc (noce_try_cond_zero_arith):handler for condtional zero based ifcvt
        (noce_emit_czero): helper for noce_try_cond_zero_arith
        (noce_cond_zero_binary_op_supported): check supported OPs for condtional zero based ifcvt
        (get_base_reg): get the reg itself or NULL_RTX if not a reg
        (noce_bbs_ok_for_cond_zero_arith): check if BBs are OK for condtional zero based ifcvt
        (noce_process_if_block): add noce_try_cond_zero_arith

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/zicond_ifcvt_opt.c: New test.
---
 gcc/ifcvt.cc                                  | 187 ++++++
 .../gcc.target/riscv/zicond_ifcvt_opt.c       | 566 ++++++++++++++++++
 2 files changed, 753 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c
  

Comments

Jeff Law Dec. 8, 2023, 12:49 a.m. UTC | #1
On 12/5/23 01:12, Fei Gao wrote:
> op=[PLUS, MINUS, IOR, XOR]
> 
> Conditional op, if zero
> rd = (rc == 0) ? (rs1 op rs2) : rs1
> -->
> czero.nez rd, rs2, rc
> op rd, rs1, rd
> 
> Conditional op, if non-zero
> rd = (rc != 0) ? (rs1 op rs2) : rs1
> -->
> czero.eqz rd, rs2, rc
> op rd, rs1, rd
> 
> Co-authored-by: Xiao Zeng<zengxiao@eswincomputing.com>
> 
> gcc/ChangeLog:
> 
> 	* ifcvt.cc (noce_try_cond_zero_arith):handler for condtional zero based ifcvt
>          (noce_emit_czero): helper for noce_try_cond_zero_arith
>          (noce_cond_zero_binary_op_supported): check supported OPs for condtional zero based ifcvt
>          (get_base_reg): get the reg itself or NULL_RTX if not a reg
>          (noce_bbs_ok_for_cond_zero_arith): check if BBs are OK for condtional zero based ifcvt
>          (noce_process_if_block): add noce_try_cond_zero_arith
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/zicond_ifcvt_opt.c: New test.
Thanks.  In the future make sure to note testing you've done.  Given 
this is a change to target independent code it must be bootstrapped and 
regression tested on a primary platform.

FTR I've done the bootstrap and regression test on x86_64 in addition to 
regression testing on rv64gc.

I'll push this to the trunk momentarily.

jeff
  
Jeff Law Dec. 11, 2023, 6:15 a.m. UTC | #2
On 12/10/23 21:01, Fei Gao wrote:
> On 2023-12-11 04:43  Jeff Law <jeffreyalaw@gmail.com> wrote:
>>
>>
>>
>> On 12/5/23 01:12, Fei Gao wrote:
>>> op=[ASHIFT, ASHIFTRT, LSHIFTRT, ROTATE, ROTATERT]
>>>
>>> Conditional op, if zero
>>> rd = (rc == 0) ? (rs1 op rs2) : rs1
>>> -->
>>> czero.nez rd, rs2, rc
>>> op rd, rs1, rd
>>>
>>> Conditional op, if non-zero
>>> rd = (rc != 0) ? (rs1 op rs2) : rs1
>>> -->
>>> czero.eqz rd, rs2, rc
>>> op rd, rs1, rd
>>>
>>> Co-authored-by: Xiao Zeng<zengxiao@eswincomputing.com>
>>>
>>> gcc/ChangeLog:
>>>
>>> * ifcvt.cc (noce_cond_zero_binary_op_supported): add support for shift like op.
>>>            (get_base_reg): add support for subreg to handle shift amount operand.
>>>            (noce_bbs_ok_for_cond_zero_arith): to replace shift amount operand.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> * gcc.target/riscv/zicond_ifcvt_opt.c: add TCs for shift like op.
>> So I removed the SUBREG handling code which makes this patch merely an
>> addition of the shift/rotate ops which trivally work just like PLUS,
>> MINUS, IOR, XOR (by conditionally zero-ing the shift count) tested on
>> x86 and pushed it to the trunk.
>>
>> As I noted before while I think handling SUBREGs is important, now is
>> not the time to be adding that support.
> 
> Thanks for your review.
> Got your point to defer support for SUBREGs.
> 
> Shift-like pattern:
> (set (reg/v:DI 137 [ y ])
>          (ashift:DI (reg/v:DI 137 [ y ])
>              (subreg:QI (reg/v:DI 138 [ z ]) 0)))
> 
> No Zicond instructions are generated with the SUBREG handling code removed.
> So I noticed your changes in testcases regarding the number of czero instruction number scanned.
> Then this looks like a NFC patch.
Not on other targets -- not every target forces the shift count into a 
narrow mode.
jeff
  

Patch

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index a0af553b9ff..1f0f5414ea1 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -787,6 +787,7 @@  static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);
 static bool noce_try_minmax (struct noce_if_info *);
 static bool noce_try_abs (struct noce_if_info *);
 static bool noce_try_sign_mask (struct noce_if_info *);
+static int noce_try_cond_zero_arith (struct noce_if_info *);
 
 /* Return the comparison code for reversed condition for IF_INFO,
    or UNKNOWN if reversing the condition is not possible.  */
@@ -1831,6 +1832,35 @@  noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
     return NULL_RTX;
 }
 
+/*  Emit a conditional zero, returning TARGET or NULL_RTX upon failure.
+    IF_INFO describes the if-conversion scenario under consideration.
+    CZERO_CODE selects the condition (EQ/NE).
+    NON_ZERO_OP is the nonzero operand of the conditional move
+    TARGET is the desired output register.  */
+
+static rtx
+noce_emit_czero (struct noce_if_info *if_info, enum rtx_code czero_code,
+		 rtx non_zero_op, rtx target)
+{
+  machine_mode mode = GET_MODE (target);
+  rtx cond_op0 = XEXP (if_info->cond, 0);
+  rtx czero_cond
+    = gen_rtx_fmt_ee (czero_code, GET_MODE (cond_op0), cond_op0, const0_rtx);
+  rtx if_then_else
+    = gen_rtx_IF_THEN_ELSE (mode, czero_cond, const0_rtx, non_zero_op);
+  rtx set = gen_rtx_SET (target, if_then_else);
+
+  rtx_insn *insn = make_insn_raw (set);
+
+  if (recog_memoized (insn) >= 0)
+    {
+      add_insn (insn);
+      return target;
+    }
+
+  return NULL_RTX;
+}
+
 /* Try only simple constants and registers here.  More complex cases
    are handled in noce_try_cmove_arith after noce_try_store_flag_arith
    has had a go at it.  */
@@ -2880,6 +2910,160 @@  noce_try_sign_mask (struct noce_if_info *if_info)
   return true;
 }
 
+/*  Check if OP is supported by conditional zero based if conversion,
+    returning TRUE if satisfied otherwise FALSE.
+
+    OP is the operation to check.  */
+
+static bool
+noce_cond_zero_binary_op_supported (rtx op)
+{
+  enum rtx_code opcode = GET_CODE (op);
+
+  if (opcode == PLUS || opcode == MINUS || opcode == IOR || opcode == XOR)
+    return true;
+
+  return false;
+}
+
+/*  Helper function to return REG itself,
+    otherwise NULL_RTX for other RTX_CODE.  */
+
+static rtx
+get_base_reg (rtx exp)
+{
+  if (REG_P (exp))
+    return exp;
+
+  return NULL_RTX;
+}
+
+/*  Check if IF-BB and THEN-BB satisfy the condition for conditional zero
+    based if conversion, returning TRUE if satisfied otherwise FALSE.
+
+    IF_INFO describes the if-conversion scenario under consideration.
+    COMMON_PTR points to the common REG of canonicalized IF_INFO->A and
+    IF_INFO->B.
+    CZERO_CODE_PTR points to the comparison code to use in czero RTX.
+    A_PTR points to the A expression of canonicalized IF_INFO->A.
+    TO_REPLACE points to the RTX to be replaced by czero RTX destnation.  */
+
+static bool
+noce_bbs_ok_for_cond_zero_arith (struct noce_if_info *if_info, rtx *common_ptr,
+				 enum rtx_code *czero_code_ptr, rtx *a_ptr,
+				 rtx **to_replace)
+{
+  rtx common = NULL_RTX;
+  rtx cond = if_info->cond;
+  rtx a = copy_rtx (if_info->a);
+  rtx b = copy_rtx (if_info->b);
+  rtx bin_op1 = NULL_RTX;
+  enum rtx_code czero_code = UNKNOWN;
+  bool reverse = false;
+  rtx op0, op1, bin_exp;
+
+  if (!noce_simple_bbs (if_info))
+    return false;
+
+  /* COND must be EQ or NE comparision of a reg and 0.  */
+  if (GET_CODE (cond) != NE && GET_CODE (cond) != EQ)
+    return false;
+  if (!REG_P (XEXP (cond, 0)) || !rtx_equal_p (XEXP (cond, 1), const0_rtx))
+    return false;
+
+  /* Canonicalize x = y : (y op z) to x = (y op z) : y.  */
+  if (REG_P (a) && noce_cond_zero_binary_op_supported (b))
+    {
+      std::swap (a, b);
+      reverse = !reverse;
+    }
+
+  /* Check if x = (y op z) : y is supported by czero based ifcvt.  */
+  if (!(noce_cond_zero_binary_op_supported (a) && REG_P (b)))
+    return false;
+
+  bin_exp = a;
+
+  /* Canonicalize x = (z op y) : y to x = (y op z) : y */
+  op1 = get_base_reg (XEXP (bin_exp, 1));
+  if (op1 && rtx_equal_p (op1, b) && COMMUTATIVE_ARITH_P (bin_exp))
+    std::swap (XEXP (bin_exp, 0), XEXP (bin_exp, 1));
+
+  op0 = get_base_reg (XEXP (bin_exp, 0));
+  if (op0 && rtx_equal_p (op0, b))
+    {
+      common = b;
+      bin_op1 = XEXP (bin_exp, 1);
+      czero_code = reverse
+		     ? noce_reversed_cond_code (if_info)
+		     : GET_CODE (cond);
+    }
+  else
+    return false;
+
+  if (czero_code == UNKNOWN)
+    return false;
+
+  if (REG_P (bin_op1))
+    *to_replace = &XEXP (bin_exp, 1);
+  else
+    return false;
+
+  *common_ptr = common;
+  *czero_code_ptr = czero_code;
+  *a_ptr = a;
+
+  return true;
+}
+
+/*  Try to covert if-then-else with conditional zero,
+    returning TURE on success or FALSE on failure.
+    IF_INFO describes the if-conversion scenario under consideration.  */
+
+static int
+noce_try_cond_zero_arith (struct noce_if_info *if_info)
+{
+  rtx target, a;
+  rtx_insn *seq;
+  machine_mode mode = GET_MODE (if_info->x);
+  rtx common = NULL_RTX;
+  enum rtx_code czero_code = UNKNOWN;
+  rtx non_zero_op = NULL_RTX;
+  rtx *to_replace = NULL;
+
+  if (!noce_bbs_ok_for_cond_zero_arith (if_info, &common, &czero_code, &a,
+					&to_replace))
+    return false;
+
+  non_zero_op = *to_replace;
+
+  start_sequence ();
+
+  /* If x is used in both input and out like x = c ? x + z : x,
+     use a new reg to avoid modifying x  */
+  if (common && rtx_equal_p (common, if_info->x))
+    target = gen_reg_rtx (mode);
+  else
+    target = if_info->x;
+
+  target = noce_emit_czero (if_info, czero_code, non_zero_op, target);
+  if (!target || !to_replace)
+    {
+      end_sequence ();
+      return false;
+    }
+
+  *to_replace = target;
+  noce_emit_move_insn (if_info->x, a);
+
+  seq = end_ifcvt_sequence (if_info);
+  if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info))
+    return false;
+
+  emit_insn_before_setloc (seq, if_info->jump, INSN_LOCATION (if_info->insn_a));
+  if_info->transform_name = "noce_try_cond_zero_arith";
+  return true;
+}
 
 /* Optimize away "if (x & C) x |= C" and similar bit manipulation
    transformations.  */
@@ -3975,6 +4159,9 @@  noce_process_if_block (struct noce_if_info *if_info)
 	goto success;
       if (noce_try_store_flag_mask (if_info))
 	goto success;
+      if (HAVE_conditional_move
+          && noce_try_cond_zero_arith (if_info))
+	goto success;
       if (HAVE_conditional_move
 	  && noce_try_cmove_arith (if_info))
 	goto success;
diff --git a/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c b/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c
new file mode 100644
index 00000000000..dcb21c15d1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c
@@ -0,0 +1,566 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb_zicond -mabi=lp64d -O2 " } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
+
+long
+test_ADD_ceqz (long x, long y, long z, long c)
+{
+  if (c)
+    x = y + z;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_ADD_ceqz_x (long x, long z, long c)
+{
+  if (c)
+    x = x + z;
+
+  return x;
+}
+
+long
+test_ADD_nez (long x, long y, long z, long c)
+{
+  if (c)
+    x = y;
+  else
+    x = y + z;
+  return x;
+}
+
+long
+test_ADD_nez_x (long x, long z, long c)
+{
+  if (c)
+    {
+    }
+  else
+    x = x + z;
+  return x;
+}
+
+long
+test_ADD_nez_2 (long x, long y, long z, long c)
+{
+  if (!c)
+    x = y + z;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_ADD_nez_x_2 (long x, long z, long c)
+{
+  if (!c)
+    x = x + z;
+
+  return x;
+}
+
+long
+test_ADD_eqz_2 (long x, long y, long z, long c)
+{
+  if (!c)
+    x = y;
+  else
+    x = y + z;
+  return x;
+}
+
+long
+test_ADD_eqz_x_2 (long x, long z, long c)
+{
+  if (!c)
+    {
+    }
+  else
+    x = x + z;
+  return x;
+}
+
+long
+test_SUB_ceqz (long x, long y, long z, long c)
+{
+  if (c)
+    x = y - z;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_SUB_ceqz_x (long x, long z, long c)
+{
+  if (c)
+    x = x - z;
+
+  return x;
+}
+
+long
+test_SUB_nez (long x, long y, long z, long c)
+{
+  if (c)
+    x = y;
+  else
+    x = y - z;
+  return x;
+}
+
+long
+test_SUB_nez_x (long x, long z, long c)
+{
+  if (c)
+    {
+    }
+  else
+    x = x - z;
+  return x;
+}
+
+long
+test_SUB_nez_2 (long x, long y, long z, long c)
+{
+  if (!c)
+    x = y - z;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_SUB_nez_x_2 (long x, long z, long c)
+{
+  if (!c)
+    x = x - z;
+
+  return x;
+}
+
+long
+test_SUB_eqz_2 (long x, long y, long z, long c)
+{
+  if (!c)
+    x = y;
+  else
+    x = y - z;
+  return x;
+}
+
+long
+test_SUB_eqz_x_2 (long x, long z, long c)
+{
+  if (!c)
+    {
+    }
+  else
+    x = x - z;
+  return x;
+}
+
+long
+test_IOR_ceqz (long x, long y, long z, long c)
+{
+  if (c)
+    x = y | z;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_IOR_ceqz_x (long x, long z, long c)
+{
+  if (c)
+    x = x | z;
+
+  return x;
+}
+
+long
+test_IOR_nez (long x, long y, long z, long c)
+{
+  if (c)
+    x = y;
+  else
+    x = y | z;
+  return x;
+}
+
+long
+test_IOR_nez_x (long x, long z, long c)
+{
+  if (c)
+    {
+    }
+  else
+    x = x | z;
+  return x;
+}
+
+long
+test_IOR_nez_2 (long x, long y, long z, long c)
+{
+  if (!c)
+    x = y | z;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_IOR_nez_x_2 (long x, long z, long c)
+{
+  if (!c)
+    x = x | z;
+
+  return x;
+}
+
+long
+test_IOR_eqz_2 (long x, long y, long z, long c)
+{
+  if (!c)
+    x = y;
+  else
+    x = y | z;
+  return x;
+}
+
+long
+test_IOR_eqz_x_2 (long x, long z, long c)
+{
+  if (!c)
+    {
+    }
+  else
+    x = x | z;
+  return x;
+}
+
+long
+test_XOR_ceqz (long x, long y, long z, long c)
+{
+  if (c)
+    x = y ^ z;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_XOR_ceqz_x (long x, long z, long c)
+{
+  if (c)
+    x = x ^ z;
+
+  return x;
+}
+
+long
+test_XOR_nez (long x, long y, long z, long c)
+{
+  if (c)
+    x = y;
+  else
+    x = y ^ z;
+  return x;
+}
+
+long
+test_XOR_nez_x (long x, long z, long c)
+{
+  if (c)
+    {
+    }
+  else
+    x = x ^ z;
+  return x;
+}
+
+long
+test_XOR_nez_2 (long x, long y, long z, long c)
+{
+  if (!c)
+    x = y ^ z;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_XOR_nez_x_2 (long x, long z, long c)
+{
+  if (!c)
+    x = x ^ z;
+
+  return x;
+}
+
+long
+test_XOR_eqz_2 (long x, long y, long z, long c)
+{
+  if (!c)
+    x = y;
+  else
+    x = y ^ z;
+  return x;
+}
+
+long
+test_XOR_eqz_x_2 (long x, long z, long c)
+{
+  if (!c)
+    {
+    }
+  else
+    x = x ^ z;
+  return x;
+}
+
+long
+test_ADD_ceqz_reverse_bin_oprands (long x, long y, long z, long c)
+{
+  if (c)
+    x = z + y;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_ADD_ceqz_x_reverse_bin_oprands (long x, long z, long c)
+{
+  if (c)
+    x = z + x;
+
+  return x;
+}
+
+long
+test_ADD_nez_reverse_bin_oprands (long x, long y, long z, long c)
+{
+  if (c)
+    x = y;
+  else
+    x = z + y;
+  return x;
+}
+
+long
+test_ADD_nez_x_reverse_bin_oprands (long x, long z, long c)
+{
+  if (c)
+    {
+    }
+  else
+    x = z + x;
+  return x;
+}
+
+long
+test_ADD_nez_2_reverse_bin_oprands (long x, long y, long z, long c)
+{
+  if (!c)
+    x = z + y;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_ADD_nez_x_2_reverse_bin_oprands (long x, long z, long c)
+{
+  if (!c)
+    x = z + x;
+
+  return x;
+}
+
+long
+test_ADD_eqz_2_reverse_bin_oprands (long x, long y, long z, long c)
+{
+  if (!c)
+    x = y;
+  else
+    x = z + y;
+  return x;
+}
+
+long
+test_ADD_eqz_x_2_reverse_bin_oprands (long x, long z, long c)
+{
+  if (!c)
+    {
+    }
+  else
+    x = z + x;
+  return x;
+}
+
+long
+test_IOR_ceqz_reverse_bin_oprands (long x, long y, long z, long c)
+{
+  if (c)
+    x = z | y;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_IOR_ceqz_x_reverse_bin_oprands (long x, long z, long c)
+{
+  if (c)
+    x = z | x;
+
+  return x;
+}
+
+long
+test_IOR_nez_reverse_bin_oprands (long x, long y, long z, long c)
+{
+  if (c)
+    x = y;
+  else
+    x = z | y;
+  return x;
+}
+
+long
+test_IOR_nez_x_reverse_bin_oprands (long x, long z, long c)
+{
+  if (c)
+    {
+    }
+  else
+    x = z | x;
+  return x;
+}
+
+long
+test_IOR_nez_2_reverse_bin_oprands (long x, long y, long z, long c)
+{
+  if (!c)
+    x = z | y;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_IOR_nez_x_2_reverse_bin_oprands (long x, long z, long c)
+{
+  if (!c)
+    x = z | x;
+
+  return x;
+}
+
+long
+test_IOR_eqz_2_reverse_bin_oprands (long x, long y, long z, long c)
+{
+  if (!c)
+    x = y;
+  else
+    x = z | y;
+  return x;
+}
+
+long
+test_IOR_eqz_x_2_reverse_bin_oprands (long x, long z, long c)
+{
+  if (!c)
+    {
+    }
+  else
+    x = z | x;
+  return x;
+}
+
+long
+test_XOR_ceqz_reverse_bin_oprands (long x, long y, long z, long c)
+{
+  if (c)
+    x = z ^ y;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_XOR_ceqz_x_reverse_bin_oprands (long x, long z, long c)
+{
+  if (c)
+    x = z ^ x;
+
+  return x;
+}
+
+long
+test_XOR_nez_reverse_bin_oprands (long x, long y, long z, long c)
+{
+  if (c)
+    x = y;
+  else
+    x = z ^ y;
+  return x;
+}
+
+long
+test_XOR_nez_x_reverse_bin_oprands (long x, long z, long c)
+{
+  if (c)
+    {
+    }
+  else
+    x = z ^ x;
+  return x;
+}
+
+long
+test_XOR_nez_2_reverse_bin_oprands (long x, long y, long z, long c)
+{
+  if (!c)
+    x = z ^ y;
+  else
+    x = y;
+  return x;
+}
+
+long
+test_XOR_nez_x_2_reverse_bin_oprands (long x, long z, long c)
+{
+  if (!c)
+    x = z ^ x;
+
+  return x;
+}
+
+long
+test_XOR_eqz_2_reverse_bin_oprands (long x, long y, long z, long c)
+{
+  if (!c)
+    x = y;
+  else
+    x = z ^ y;
+  return x;
+}
+
+long
+test_XOR_eqz_x_2_reverse_bin_oprands (long x, long z, long c)
+{
+  if (!c)
+    {
+    }
+  else
+    x = z ^ x;
+  return x;
+}
+
+/* { dg-final { scan-assembler-times {czero\.eqz} 28 } } */
+/* { dg-final { scan-assembler-times {czero\.nez} 28 } } */