[V2,1/1,fwprop] : Add the support of forwarding the vec_duplicate rtx
Checks
Commit Message
From: Lehua Ding <lehua.ding@rivai.ai>
Hi Richard,
According to your previous comments, I adjusted the code. It will be easier (by
extend `get_inner_reg` function) to support the forwarding of new rtx operators
(e.g. sign-extend) in the future. Please help review this code, thank you so
much.
The current code doesn't take into account the possible increase in register
pressure, I'm going to spend a little more time understanding the
flag_ira_hoist_pressure you mentioned.
For the add of new target hook, I understand that it is still necessary. For
standard operators such as vec_duplicate, it can be processed in fwprop.cc.
However, when I was developing the RISC-V Vector Extension, I hope that the
following insn 9 can be forwarded into insn 10 (when other oeprands of
UNSPEC_RVV are same), so that there is only one instruction insn 11 bellow:
```
(insn 9 5 10 2 (parallel [
(set (reg/v:VNx4SI 134 [ va ])
(unspec:VNx4SI [
(const_int 0 [0])
(vec_duplicate:VNx4SI (subreg/s/u:SI (reg/v:DI 136 [ a ]) 0))
(reg/v:DI 138 [ vl ])
(const_int 9 [0x9])
(reg:SI 66 vl)
(reg:SI 67 vtype)
] UNSPEC_RVV))
(clobber (reg:DI 141))
]) "test3.c":5:25 10657 {vmvvnx4si_v_x_internal}
(nil))
(insn 10 9 14 2 (parallel [
(set (reg:VNx4SI 135 [ <retval> ])
(unspec:VNx4SI [
(unspec:VNx4SI [
(const_int 0 [0])
(plus:VNx4SI (reg/v:VNx4SI 134 [ va ])
(reg/v:VNx4SI 137 [ vb ]))
(const_int 0 [0])
] UNSPEC_SELECT)
(reg/v:DI 138 [ vl ])
(const_int 9 [0x9])
(reg:SI 66 vl)
(reg:SI 67 vtype)
] UNSPEC_RVV))
(clobber (reg:DI 142))
]) "test3.c":6:16 7983 {vaddvnx4si_vv}
(nil))
(insn 11 9 14 2 (parallel [
(set (reg:VNx4SI 135 [ <retval> ])
(unspec:VNx4SI [
(unspec:VNx4SI [
(const_int 0 [0])
(plus:VNx4SI
(vec_duplicate:VNx4SI (subreg/s/u:SI (reg/v:DI 136 [ a ]) 0))
(reg/v:VNx4SI 137 [ vb ]))
(const_int 0 [0])
] UNSPEC_SELECT)
(reg/v:DI 138 [ vl ])
(const_int 9 [0x9])
(reg:SI 66 vl)
(reg:SI 67 vtype)
] UNSPEC_RVV))
(clobber (reg:DI 142))
]) "test3.c":6:16 7983 {vaddvnx4si_vv}
(nil))
```
The current very preliminary idea is to return the new src through HOOK `rtx
forward_src (rtx_insn *use_insn, rtx dest, rtx src)`, and return the incoming
src by default (that is, without modification), similar to the following
modification:
```
- rtx src = SET_SRC (def_set);
+ rtx src = targetm.forward_src (use->insn ()->rtl (), dest, SET_SRC (def_set));
```
Best,
Lehua
gcc/ChangeLog:
* fwprop.cc (get_inner_reg): New utils function
(fwprop_propagation::profitable_p): Allow for more
(src_single_def_p): Add new function for src rtx
(forward_propagate_into): Change to new function call
---
gcc/fwprop.cc | 58 ++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 46 insertions(+), 12 deletions(-)
@@ -317,6 +317,21 @@ fwprop_propagation::folded_to_constants_p () const
return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
}
+/* Return the inner reg if x is wrappered by some specific operation. Like
+ VEC_DUPLICATE. It can be extended to let more specific operation be
+ forwarded. */
+
+rtx
+get_inner_reg (rtx x)
+{
+ switch (GET_CODE (x))
+ {
+ case VEC_DUPLICATE:
+ return XEXP (x, 0);
+ default:
+ return x;
+ }
+}
/* Return true if it is worth keeping the result of the propagation,
false if it would increase the complexity of the pattern too much. */
@@ -331,15 +346,17 @@ fwprop_propagation::profitable_p () const
&& (result_flags & PROFITABLE))
return true;
- if (REG_P (to))
+ rtx new_to = get_inner_reg (to);
+
+ if (REG_P (new_to))
return true;
- if (GET_CODE (to) == SUBREG
- && REG_P (SUBREG_REG (to))
- && !paradoxical_subreg_p (to))
+ if (GET_CODE (new_to) == SUBREG
+ && REG_P (SUBREG_REG (new_to))
+ && !paradoxical_subreg_p (new_to))
return true;
- if (CONSTANT_P (to))
+ if (CONSTANT_P (new_to))
return true;
return false;
@@ -353,6 +370,23 @@ reg_single_def_p (rtx x)
return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
}
+/* Check that X has a single def although wrappered with other code. */
+
+static bool
+src_single_def_p (rtx x)
+{
+ x = get_inner_reg (x);
+
+ if (GET_CODE (x) == SUBREG)
+ {
+ if (!REG_P (SUBREG_REG (x)) || paradoxical_subreg_p (x))
+ return false;
+ x = SUBREG_REG (x);
+ }
+
+ return reg_single_def_p (x);
+}
+
/* Return true if X contains a paradoxical subreg. */
static bool
@@ -865,15 +899,15 @@ forward_propagate_into (use_info *use, bool reg_prop_only = false)
rtx dest = SET_DEST (def_set);
rtx src = SET_SRC (def_set);
- /* Allow propagations into a loop only for reg-to-reg copies, since
- replacing one register by another shouldn't increase the cost.
- Propagations from inner loop to outer loop should also be ok. */
+ if (reg_prop_only && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
+ return false;
+
+ /* Allow propagations into a loop only for simgple-src-to-reg, maybe increase
+ the cost. Propagations from inner loop to outer loop should also be ok. */
struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
- if ((reg_prop_only
- || (def_loop != use_loop
- && !flow_loop_nested_p (use_loop, def_loop)))
- && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
+ if ((def_loop != use_loop && !flow_loop_nested_p (use_loop, def_loop))
+ && (!reg_single_def_p (dest) || !src_single_def_p (src)))
return false;
/* Don't substitute into a non-local goto, this confuses CFG. */