[V2,1/1,fwprop] : Add the support of forwarding the vec_duplicate rtx

Message ID 20230129141909.37927-1-lehua.ding@rivai.ai
State Accepted
Headers
Series [V2,1/1,fwprop] : Add the support of forwarding the vec_duplicate rtx |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Lehua Ding Jan. 29, 2023, 2:19 p.m. UTC
  From: Lehua Ding <lehua.ding@rivai.ai>

Hi Richard,

According to your previous comments, I adjusted the code. It will be easier (by
extend `get_inner_reg` function) to support the forwarding of new rtx operators
(e.g. sign-extend) in the future. Please help review this code, thank you so
much.

The current code doesn't take into account the possible increase in register
pressure, I'm going to spend a little more time understanding the
flag_ira_hoist_pressure you mentioned.

For the add of new target hook, I understand that it is still necessary. For
standard operators such as vec_duplicate, it can be processed in fwprop.cc.
However, when I was developing the RISC-V Vector Extension, I hope that the
following insn 9 can be forwarded into insn 10 (when other oeprands of
UNSPEC_RVV are same), so that there is only one instruction insn 11 bellow:

```
(insn 9 5 10 2 (parallel [
            (set (reg/v:VNx4SI 134 [ va ])
                (unspec:VNx4SI [
                        (const_int 0 [0])
                        (vec_duplicate:VNx4SI (subreg/s/u:SI (reg/v:DI 136 [ a ]) 0))
                        (reg/v:DI 138 [ vl ])
                        (const_int 9 [0x9])
                        (reg:SI 66 vl)
                        (reg:SI 67 vtype)
                    ] UNSPEC_RVV))
            (clobber (reg:DI 141))
        ]) "test3.c":5:25 10657 {vmvvnx4si_v_x_internal}
     (nil))
(insn 10 9 14 2 (parallel [
            (set (reg:VNx4SI 135 [ <retval> ])
                (unspec:VNx4SI [
                        (unspec:VNx4SI [
                                (const_int 0 [0])
                                (plus:VNx4SI (reg/v:VNx4SI 134 [ va ])
                                    (reg/v:VNx4SI 137 [ vb ]))
                                (const_int 0 [0])
                            ] UNSPEC_SELECT)
                        (reg/v:DI 138 [ vl ])
                        (const_int 9 [0x9])
                        (reg:SI 66 vl)
                        (reg:SI 67 vtype)
                    ] UNSPEC_RVV))
            (clobber (reg:DI 142))
        ]) "test3.c":6:16 7983 {vaddvnx4si_vv}
     (nil))

(insn 11 9 14 2 (parallel [
            (set (reg:VNx4SI 135 [ <retval> ])
                (unspec:VNx4SI [
                        (unspec:VNx4SI [
                                (const_int 0 [0])
                                (plus:VNx4SI
                                    (vec_duplicate:VNx4SI (subreg/s/u:SI (reg/v:DI 136 [ a ]) 0))
                                    (reg/v:VNx4SI 137 [ vb ]))
                                (const_int 0 [0])
                            ] UNSPEC_SELECT)
                        (reg/v:DI 138 [ vl ])
                        (const_int 9 [0x9])
                        (reg:SI 66 vl)
                        (reg:SI 67 vtype)
                    ] UNSPEC_RVV))
            (clobber (reg:DI 142))
        ]) "test3.c":6:16 7983 {vaddvnx4si_vv}
     (nil))
```

The current very preliminary idea is to return the new src through HOOK `rtx
forward_src (rtx_insn *use_insn, rtx dest, rtx src)`, and return the incoming
src by default (that is, without modification), similar to the following
modification:

```
-  rtx src = SET_SRC (def_set);
+  rtx src = targetm.forward_src (use->insn ()->rtl (), dest, SET_SRC (def_set));
```

Best,
Lehua

gcc/ChangeLog:

	* fwprop.cc (get_inner_reg): New utils function
	(fwprop_propagation::profitable_p): Allow for more
	(src_single_def_p): Add new function for src rtx
	(forward_propagate_into): Change to new function call

---
 gcc/fwprop.cc | 58 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 46 insertions(+), 12 deletions(-)
  

Patch

diff --git a/gcc/fwprop.cc b/gcc/fwprop.cc
index ae342f59407..a24d8724028 100644
--- a/gcc/fwprop.cc
+++ b/gcc/fwprop.cc
@@ -317,6 +317,21 @@  fwprop_propagation::folded_to_constants_p () const
   return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
 }
 
+/* Return the inner reg if x is wrappered by some specific operation. Like
+   VEC_DUPLICATE. It can be extended to let more specific operation be
+   forwarded. */
+
+rtx
+get_inner_reg (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case VEC_DUPLICATE:
+      return XEXP (x, 0);
+    default:
+      return x;
+    }
+}
 
 /* Return true if it is worth keeping the result of the propagation,
    false if it would increase the complexity of the pattern too much.  */
@@ -331,15 +346,17 @@  fwprop_propagation::profitable_p () const
       && (result_flags & PROFITABLE))
     return true;
 
-  if (REG_P (to))
+  rtx new_to = get_inner_reg (to);
+
+  if (REG_P (new_to))
     return true;
 
-  if (GET_CODE (to) == SUBREG
-      && REG_P (SUBREG_REG (to))
-      && !paradoxical_subreg_p (to))
+  if (GET_CODE (new_to) == SUBREG
+      && REG_P (SUBREG_REG (new_to))
+      && !paradoxical_subreg_p (new_to))
     return true;
 
-  if (CONSTANT_P (to))
+  if (CONSTANT_P (new_to))
     return true;
 
   return false;
@@ -353,6 +370,23 @@  reg_single_def_p (rtx x)
   return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
 }
 
+/* Check that X has a single def although wrappered with other code. */
+
+static bool
+src_single_def_p (rtx x)
+{
+  x = get_inner_reg (x);
+
+  if (GET_CODE (x) == SUBREG)
+    {
+      if (!REG_P (SUBREG_REG (x)) || paradoxical_subreg_p (x))
+	return false;
+      x = SUBREG_REG (x);
+    }
+
+  return reg_single_def_p (x);
+}
+
 /* Return true if X contains a paradoxical subreg.  */
 
 static bool
@@ -865,15 +899,15 @@  forward_propagate_into (use_info *use, bool reg_prop_only = false)
   rtx dest = SET_DEST (def_set);
   rtx src = SET_SRC (def_set);
 
-  /* Allow propagations into a loop only for reg-to-reg copies, since
-     replacing one register by another shouldn't increase the cost.
-     Propagations from inner loop to outer loop should also be ok.  */
+  if (reg_prop_only && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
+    return false;
+
+  /* Allow propagations into a loop only for simgple-src-to-reg, maybe increase
+     the cost. Propagations from inner loop to outer loop should also be ok.  */
   struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
   struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
-  if ((reg_prop_only
-       || (def_loop != use_loop
-	   && !flow_loop_nested_p (use_loop, def_loop)))
-      && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
+  if ((def_loop != use_loop && !flow_loop_nested_p (use_loop, def_loop))
+      && (!reg_single_def_p (dest) || !src_single_def_p (src)))
     return false;
 
   /* Don't substitute into a non-local goto, this confuses CFG.  */