[V13] VECT: Add decrement IV iteration loop control by variable amount support
Checks
Commit Message
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
This patch is supporting decrement IV by following the flow designed by Richard:
(1) In vect_set_loop_condition_partial_vectors, for the first iteration of:
call vect_set_loop_controls_directly.
(2) vect_set_loop_controls_directly calculates "step" as in your patch.
If rgc has 1 control, this step is the SSA name created for that control.
Otherwise the step is a fresh SSA name, as in your patch.
(3) vect_set_loop_controls_directly stores this step somewhere for later
use, probably in LOOP_VINFO. Let's use "S" to refer to this stored step.
(4) After the vect_set_loop_controls_directly call above, and outside
the "if" statement that now contains vect_set_loop_controls_directly,
check whether rgc->controls.length () > 1. If so, use
vect_adjust_loop_lens_control to set the controls based on S.
Then the only caller of vect_adjust_loop_lens_control is
vect_set_loop_condition_partial_vectors. And the starting
step for vect_adjust_loop_lens_control is always S.
This patch has well tested for single-rgroup and multiple-rgroup (SLP) and
passed all testcase in RISC-V port.
Also, pass tests for multiple-rgroup (non-SLP) tested on vec_pack_trunk.
gcc/ChangeLog:
* tree-vect-loop-manip.cc (vect_set_loop_controls_directly): Add decrement IV support.
(vect_adjust_loop_lens_control): Ditto.
(vect_set_loop_condition_partial_vectors): Ditto.
* tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): New variable.
* tree-vectorizer.h (LOOP_VINFO_USING_DECREMENTING_IV_P): New macro.
(LOOP_VINFO_DECREMENTING_IV_STEP): New macro.
---
gcc/tree-vect-loop-manip.cc | 179 +++++++++++++++++++++++++++++++++---
gcc/tree-vect-loop.cc | 13 +++
gcc/tree-vectorizer.h | 12 +++
3 files changed, 193 insertions(+), 11 deletions(-)
Comments
Forget about V13. Plz go directly review V14.
https://gcc.gnu.org/pipermail/gcc-patches/2023-May/619478.html
Thanks.
juzhe.zhong@rivai.ai
From: juzhe.zhong
Date: 2023-05-24 22:29
To: gcc-patches
CC: richard.sandiford; rguenther; Ju-Zhe Zhong
Subject: [PATCH V13] VECT: Add decrement IV iteration loop control by variable amount support
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
This patch is supporting decrement IV by following the flow designed by Richard:
(1) In vect_set_loop_condition_partial_vectors, for the first iteration of:
call vect_set_loop_controls_directly.
(2) vect_set_loop_controls_directly calculates "step" as in your patch.
If rgc has 1 control, this step is the SSA name created for that control.
Otherwise the step is a fresh SSA name, as in your patch.
(3) vect_set_loop_controls_directly stores this step somewhere for later
use, probably in LOOP_VINFO. Let's use "S" to refer to this stored step.
(4) After the vect_set_loop_controls_directly call above, and outside
the "if" statement that now contains vect_set_loop_controls_directly,
check whether rgc->controls.length () > 1. If so, use
vect_adjust_loop_lens_control to set the controls based on S.
Then the only caller of vect_adjust_loop_lens_control is
vect_set_loop_condition_partial_vectors. And the starting
step for vect_adjust_loop_lens_control is always S.
This patch has well tested for single-rgroup and multiple-rgroup (SLP) and
passed all testcase in RISC-V port.
Also, pass tests for multiple-rgroup (non-SLP) tested on vec_pack_trunk.
gcc/ChangeLog:
* tree-vect-loop-manip.cc (vect_set_loop_controls_directly): Add decrement IV support.
(vect_adjust_loop_lens_control): Ditto.
(vect_set_loop_condition_partial_vectors): Ditto.
* tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): New variable.
* tree-vectorizer.h (LOOP_VINFO_USING_DECREMENTING_IV_P): New macro.
(LOOP_VINFO_DECREMENTING_IV_STEP): New macro.
---
gcc/tree-vect-loop-manip.cc | 179 +++++++++++++++++++++++++++++++++---
gcc/tree-vect-loop.cc | 13 +++
gcc/tree-vectorizer.h | 12 +++
3 files changed, 193 insertions(+), 11 deletions(-)
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index ff6159e08d5..3a872668f89 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -468,6 +468,38 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
gimple_stmt_iterator incr_gsi;
bool insert_after;
standard_iv_increment_position (loop, &incr_gsi, &insert_after);
+ if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
+ {
+ /* single rgroup:
+ ...
+ _10 = (unsigned long) count_12(D);
+ ...
+ # ivtmp_9 = PHI <ivtmp_35(6), _10(5)>
+ _36 = MIN_EXPR <ivtmp_9, POLY_INT_CST [4, 4]>;
+ ...
+ vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
+ ...
+ ivtmp_35 = ivtmp_9 - _36;
+ ...
+ if (ivtmp_35 != 0)
+ goto <bb 4>; [83.33%]
+ else
+ goto <bb 5>; [16.67%]
+ */
+ nitems_total = gimple_convert (preheader_seq, iv_type, nitems_total);
+ tree step = rgc->controls.length () == 1 ? rgc->controls[0]
+ : make_ssa_name (iv_type);
+ /* Create decrement IV. */
+ create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
+ insert_after, &index_before_incr, &index_after_incr);
+ gimple_seq_add_stmt (header_seq, gimple_build_assign (step, MIN_EXPR,
+ index_before_incr,
+ nitems_step));
+ LOOP_VINFO_DECREMENTING_IV_STEP (loop_vinfo) = step;
+ return index_after_incr;
+ }
+
+ /* Create increment IV. */
create_iv (build_int_cst (iv_type, 0), PLUS_EXPR, nitems_step, NULL_TREE,
loop, &incr_gsi, insert_after, &index_before_incr,
&index_after_incr);
@@ -683,6 +715,63 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
return next_ctrl;
}
+/* Try to use adjust loop lens for multiple-rgroups.
+
+ _36 = MIN_EXPR <ivtmp_34, VF>;
+
+ First length (MIN (X, VF/N)):
+ loop_len_15 = MIN_EXPR <_36, VF/N>;
+
+ Second length:
+ tmp = _36 - loop_len_15;
+ loop_len_16 = MIN (tmp, VF/N);
+
+ Third length:
+ tmp2 = tmp - loop_len_16;
+ loop_len_17 = MIN (tmp2, VF/N);
+
+ Last length:
+ loop_len_18 = tmp2 - loop_len_17;
+*/
+
+static void
+vect_adjust_loop_lens_control (tree iv_type, gimple_seq *seq,
+ rgroup_controls *dest_rgm, tree step)
+{
+ tree ctrl_type = dest_rgm->type;
+ poly_uint64 nitems_per_ctrl
+ = TYPE_VECTOR_SUBPARTS (ctrl_type) * dest_rgm->factor;
+ tree length_limit = build_int_cst (iv_type, nitems_per_ctrl);
+
+ for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
+ {
+ tree ctrl = dest_rgm->controls[i];
+ if (i == 0)
+ {
+ /* First iteration: MIN (X, VF/N) capped to the range [0, VF/N]. */
+ gassign *assign
+ = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
+ gimple_seq_add_stmt (seq, assign);
+ }
+ else if (i == dest_rgm->controls.length () - 1)
+ {
+ /* Last iteration: Remain capped to the range [0, VF/N]. */
+ gassign *assign = gimple_build_assign (ctrl, MINUS_EXPR, step,
+ dest_rgm->controls[i - 1]);
+ gimple_seq_add_stmt (seq, assign);
+ }
+ else
+ {
+ /* (MIN (remain, VF*I/N)) capped to the range [0, VF/N]. */
+ step = gimple_build (seq, MINUS_EXPR, iv_type, step,
+ dest_rgm->controls[i - 1]);
+ gassign *assign
+ = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
+ gimple_seq_add_stmt (seq, assign);
+ }
+ }
+}
+
/* Set up the iteration condition and rgroup controls for LOOP, given
that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the vectorized
loop. LOOP_VINFO describes the vectorization of LOOP. NITERS is
@@ -753,17 +842,85 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
continue;
}
- /* See whether zero-based IV would ever generate all-false masks
- or zero length before wrapping around. */
- bool might_wrap_p = vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc);
-
- /* Set up all controls for this group. */
- test_ctrl = vect_set_loop_controls_directly (loop, loop_vinfo,
- &preheader_seq,
- &header_seq,
- loop_cond_gsi, rgc,
- niters, niters_skip,
- might_wrap_p);
+ if (!LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
+ || !LOOP_VINFO_DECREMENTING_IV_STEP (loop_vinfo))
+ {
+ /* See whether zero-based IV would ever generate all-false masks
+ or zero length before wrapping around. */
+ bool might_wrap_p = vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc);
+
+ /* Set up all controls for this group. */
+ test_ctrl
+ = vect_set_loop_controls_directly (loop, loop_vinfo,
+ &preheader_seq, &header_seq,
+ loop_cond_gsi, rgc, niters,
+ niters_skip, might_wrap_p);
+ }
+
+ /* Decrement IV only run vect_set_loop_controls_directly once. */
+ if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
+ && rgc->controls.length () > 1)
+ {
+ /*
+ - Multiple rgroup (SLP):
+ ...
+ _38 = (unsigned long) bnd.7_29;
+ _39 = _38 * 2;
+ ...
+ # ivtmp_41 = PHI <ivtmp_42(6), _39(5)>
+ ...
+ _43 = MIN_EXPR <ivtmp_41, 32>;
+ loop_len_26 = MIN_EXPR <_43, 16>;
+ loop_len_25 = _43 - loop_len_26;
+ ...
+ .LEN_STORE (_6, 8B, loop_len_26, ...);
+ ...
+ .LEN_STORE (_25, 8B, loop_len_25, ...);
+ _33 = loop_len_26 / 2;
+ ...
+ .LEN_STORE (_8, 16B, _33, ...);
+ _36 = loop_len_25 / 2;
+ ...
+ .LEN_STORE (_15, 16B, _36, ...);
+ ivtmp_42 = ivtmp_41 - _43;
+ ...
+
+ - Multiple rgroup (non-SLP):
+ ...
+ _38 = (unsigned long) n_12(D);
+ ...
+ # ivtmp_38 = PHI <ivtmp_39(3), 100(2)>
+ ...
+ _40 = MIN_EXPR <ivtmp_38, POLY_INT_CST [8, 8]>;
+ loop_len_21 = MIN_EXPR <_40, POLY_INT_CST [2, 2]>;
+ _41 = _40 - loop_len_21;
+ loop_len_20 = MIN_EXPR <_41, POLY_INT_CST [2, 2]>;
+ _42 = _40 - loop_len_20;
+ loop_len_19 = MIN_EXPR <_42, POLY_INT_CST [2, 2]>;
+ _43 = _40 - loop_len_19;
+ loop_len_16 = MIN_EXPR <_43, POLY_INT_CST [2, 2]>;
+ ...
+ vect__4.8_15 = .LEN_LOAD (_6, 64B, loop_len_21, 0);
+ ...
+ vect__4.9_8 = .LEN_LOAD (_13, 64B, loop_len_20, 0);
+ ...
+ vect__4.10_28 = .LEN_LOAD (_46, 64B, loop_len_19, 0);
+ ...
+ vect__4.11_30 = .LEN_LOAD (_49, 64B, loop_len_16, 0);
+ vect__7.13_31 = VEC_PACK_TRUNC_EXPR <...>,
+ vect__7.13_32 = VEC_PACK_TRUNC_EXPR <...>;
+ vect__7.12_33 = VEC_PACK_TRUNC_EXPR <...>;
+ ...
+ .LEN_STORE (_14, 16B, _40, vect__7.12_33, 0);
+ ivtmp_39 = ivtmp_38 - _40;
+ ...
+ */
+ tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+ tree step = LOOP_VINFO_DECREMENTING_IV_STEP (loop_vinfo);
+ gcc_assert (step);
+ vect_adjust_loop_lens_control (iv_type, &header_seq, rgc, step);
+ break;
+ }
}
/* Emit all accumulated statements. */
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index cf10132b0bf..456f50fa7cc 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -973,6 +973,8 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
vectorizable (false),
can_use_partial_vectors_p (param_vect_partial_vector_usage != 0),
using_partial_vectors_p (false),
+ using_decrementing_iv_p (false),
+ decrementing_iv_step (NULL_TREE),
epil_using_partial_vectors_p (false),
partial_load_store_bias (0),
peeling_for_gaps (false),
@@ -2725,6 +2727,17 @@ start_over:
&& !vect_verify_loop_lens (loop_vinfo))
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+ /* If we're vectorizing an loop that uses length "controls" and
+ can iterate more than once, we apply decrementing IV approach
+ in loop control. */
+ if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+ && !LOOP_VINFO_LENS (loop_vinfo).is_empty ()
+ && LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo) == 0
+ && !(LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && known_le (LOOP_VINFO_INT_NITERS (loop_vinfo),
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo))))
+ LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo) = true;
+
/* If we're vectorizing an epilogue loop, the vectorized loop either needs
to be able to handle fewer than VF scalars, or needs to have a lower VF
than the main loop. */
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 02d2ad6fba1..7ed079f543a 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -818,6 +818,16 @@ public:
the vector loop can handle fewer than VF scalars. */
bool using_partial_vectors_p;
+ /* True if we've decided to use a decrementing loop control IV that counts
+ scalars. This can be done for any loop that:
+
+ (a) uses length "controls"; and
+ (b) can iterate more than once. */
+ bool using_decrementing_iv_p;
+
+ /* The variable amount step for decrement IV. */
+ tree decrementing_iv_step;
+
/* True if we've decided to use partially-populated vectors for the
epilogue of loop. */
bool epil_using_partial_vectors_p;
@@ -890,6 +900,8 @@ public:
#define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable
#define LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P(L) (L)->can_use_partial_vectors_p
#define LOOP_VINFO_USING_PARTIAL_VECTORS_P(L) (L)->using_partial_vectors_p
+#define LOOP_VINFO_USING_DECREMENTING_IV_P(L) (L)->using_decrementing_iv_p
+#define LOOP_VINFO_DECREMENTING_IV_STEP(L) (L)->decrementing_iv_step
#define LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P(L) \
(L)->epil_using_partial_vectors_p
#define LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS(L) (L)->partial_load_store_bias
--
2.36.1
@@ -468,6 +468,38 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
gimple_stmt_iterator incr_gsi;
bool insert_after;
standard_iv_increment_position (loop, &incr_gsi, &insert_after);
+ if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
+ {
+ /* single rgroup:
+ ...
+ _10 = (unsigned long) count_12(D);
+ ...
+ # ivtmp_9 = PHI <ivtmp_35(6), _10(5)>
+ _36 = MIN_EXPR <ivtmp_9, POLY_INT_CST [4, 4]>;
+ ...
+ vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
+ ...
+ ivtmp_35 = ivtmp_9 - _36;
+ ...
+ if (ivtmp_35 != 0)
+ goto <bb 4>; [83.33%]
+ else
+ goto <bb 5>; [16.67%]
+ */
+ nitems_total = gimple_convert (preheader_seq, iv_type, nitems_total);
+ tree step = rgc->controls.length () == 1 ? rgc->controls[0]
+ : make_ssa_name (iv_type);
+ /* Create decrement IV. */
+ create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
+ insert_after, &index_before_incr, &index_after_incr);
+ gimple_seq_add_stmt (header_seq, gimple_build_assign (step, MIN_EXPR,
+ index_before_incr,
+ nitems_step));
+ LOOP_VINFO_DECREMENTING_IV_STEP (loop_vinfo) = step;
+ return index_after_incr;
+ }
+
+ /* Create increment IV. */
create_iv (build_int_cst (iv_type, 0), PLUS_EXPR, nitems_step, NULL_TREE,
loop, &incr_gsi, insert_after, &index_before_incr,
&index_after_incr);
@@ -683,6 +715,63 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
return next_ctrl;
}
+/* Try to use adjust loop lens for multiple-rgroups.
+
+ _36 = MIN_EXPR <ivtmp_34, VF>;
+
+ First length (MIN (X, VF/N)):
+ loop_len_15 = MIN_EXPR <_36, VF/N>;
+
+ Second length:
+ tmp = _36 - loop_len_15;
+ loop_len_16 = MIN (tmp, VF/N);
+
+ Third length:
+ tmp2 = tmp - loop_len_16;
+ loop_len_17 = MIN (tmp2, VF/N);
+
+ Last length:
+ loop_len_18 = tmp2 - loop_len_17;
+*/
+
+static void
+vect_adjust_loop_lens_control (tree iv_type, gimple_seq *seq,
+ rgroup_controls *dest_rgm, tree step)
+{
+ tree ctrl_type = dest_rgm->type;
+ poly_uint64 nitems_per_ctrl
+ = TYPE_VECTOR_SUBPARTS (ctrl_type) * dest_rgm->factor;
+ tree length_limit = build_int_cst (iv_type, nitems_per_ctrl);
+
+ for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
+ {
+ tree ctrl = dest_rgm->controls[i];
+ if (i == 0)
+ {
+ /* First iteration: MIN (X, VF/N) capped to the range [0, VF/N]. */
+ gassign *assign
+ = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
+ gimple_seq_add_stmt (seq, assign);
+ }
+ else if (i == dest_rgm->controls.length () - 1)
+ {
+ /* Last iteration: Remain capped to the range [0, VF/N]. */
+ gassign *assign = gimple_build_assign (ctrl, MINUS_EXPR, step,
+ dest_rgm->controls[i - 1]);
+ gimple_seq_add_stmt (seq, assign);
+ }
+ else
+ {
+ /* (MIN (remain, VF*I/N)) capped to the range [0, VF/N]. */
+ step = gimple_build (seq, MINUS_EXPR, iv_type, step,
+ dest_rgm->controls[i - 1]);
+ gassign *assign
+ = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
+ gimple_seq_add_stmt (seq, assign);
+ }
+ }
+}
+
/* Set up the iteration condition and rgroup controls for LOOP, given
that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the vectorized
loop. LOOP_VINFO describes the vectorization of LOOP. NITERS is
@@ -753,17 +842,85 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
continue;
}
- /* See whether zero-based IV would ever generate all-false masks
- or zero length before wrapping around. */
- bool might_wrap_p = vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc);
-
- /* Set up all controls for this group. */
- test_ctrl = vect_set_loop_controls_directly (loop, loop_vinfo,
- &preheader_seq,
- &header_seq,
- loop_cond_gsi, rgc,
- niters, niters_skip,
- might_wrap_p);
+ if (!LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
+ || !LOOP_VINFO_DECREMENTING_IV_STEP (loop_vinfo))
+ {
+ /* See whether zero-based IV would ever generate all-false masks
+ or zero length before wrapping around. */
+ bool might_wrap_p = vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc);
+
+ /* Set up all controls for this group. */
+ test_ctrl
+ = vect_set_loop_controls_directly (loop, loop_vinfo,
+ &preheader_seq, &header_seq,
+ loop_cond_gsi, rgc, niters,
+ niters_skip, might_wrap_p);
+ }
+
+ /* Decrement IV only run vect_set_loop_controls_directly once. */
+ if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
+ && rgc->controls.length () > 1)
+ {
+ /*
+ - Multiple rgroup (SLP):
+ ...
+ _38 = (unsigned long) bnd.7_29;
+ _39 = _38 * 2;
+ ...
+ # ivtmp_41 = PHI <ivtmp_42(6), _39(5)>
+ ...
+ _43 = MIN_EXPR <ivtmp_41, 32>;
+ loop_len_26 = MIN_EXPR <_43, 16>;
+ loop_len_25 = _43 - loop_len_26;
+ ...
+ .LEN_STORE (_6, 8B, loop_len_26, ...);
+ ...
+ .LEN_STORE (_25, 8B, loop_len_25, ...);
+ _33 = loop_len_26 / 2;
+ ...
+ .LEN_STORE (_8, 16B, _33, ...);
+ _36 = loop_len_25 / 2;
+ ...
+ .LEN_STORE (_15, 16B, _36, ...);
+ ivtmp_42 = ivtmp_41 - _43;
+ ...
+
+ - Multiple rgroup (non-SLP):
+ ...
+ _38 = (unsigned long) n_12(D);
+ ...
+ # ivtmp_38 = PHI <ivtmp_39(3), 100(2)>
+ ...
+ _40 = MIN_EXPR <ivtmp_38, POLY_INT_CST [8, 8]>;
+ loop_len_21 = MIN_EXPR <_40, POLY_INT_CST [2, 2]>;
+ _41 = _40 - loop_len_21;
+ loop_len_20 = MIN_EXPR <_41, POLY_INT_CST [2, 2]>;
+ _42 = _40 - loop_len_20;
+ loop_len_19 = MIN_EXPR <_42, POLY_INT_CST [2, 2]>;
+ _43 = _40 - loop_len_19;
+ loop_len_16 = MIN_EXPR <_43, POLY_INT_CST [2, 2]>;
+ ...
+ vect__4.8_15 = .LEN_LOAD (_6, 64B, loop_len_21, 0);
+ ...
+ vect__4.9_8 = .LEN_LOAD (_13, 64B, loop_len_20, 0);
+ ...
+ vect__4.10_28 = .LEN_LOAD (_46, 64B, loop_len_19, 0);
+ ...
+ vect__4.11_30 = .LEN_LOAD (_49, 64B, loop_len_16, 0);
+ vect__7.13_31 = VEC_PACK_TRUNC_EXPR <...>,
+ vect__7.13_32 = VEC_PACK_TRUNC_EXPR <...>;
+ vect__7.12_33 = VEC_PACK_TRUNC_EXPR <...>;
+ ...
+ .LEN_STORE (_14, 16B, _40, vect__7.12_33, 0);
+ ivtmp_39 = ivtmp_38 - _40;
+ ...
+ */
+ tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+ tree step = LOOP_VINFO_DECREMENTING_IV_STEP (loop_vinfo);
+ gcc_assert (step);
+ vect_adjust_loop_lens_control (iv_type, &header_seq, rgc, step);
+ break;
+ }
}
/* Emit all accumulated statements. */
@@ -973,6 +973,8 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
vectorizable (false),
can_use_partial_vectors_p (param_vect_partial_vector_usage != 0),
using_partial_vectors_p (false),
+ using_decrementing_iv_p (false),
+ decrementing_iv_step (NULL_TREE),
epil_using_partial_vectors_p (false),
partial_load_store_bias (0),
peeling_for_gaps (false),
@@ -2725,6 +2727,17 @@ start_over:
&& !vect_verify_loop_lens (loop_vinfo))
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+ /* If we're vectorizing an loop that uses length "controls" and
+ can iterate more than once, we apply decrementing IV approach
+ in loop control. */
+ if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+ && !LOOP_VINFO_LENS (loop_vinfo).is_empty ()
+ && LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo) == 0
+ && !(LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && known_le (LOOP_VINFO_INT_NITERS (loop_vinfo),
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo))))
+ LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo) = true;
+
/* If we're vectorizing an epilogue loop, the vectorized loop either needs
to be able to handle fewer than VF scalars, or needs to have a lower VF
than the main loop. */
@@ -818,6 +818,16 @@ public:
the vector loop can handle fewer than VF scalars. */
bool using_partial_vectors_p;
+ /* True if we've decided to use a decrementing loop control IV that counts
+ scalars. This can be done for any loop that:
+
+ (a) uses length "controls"; and
+ (b) can iterate more than once. */
+ bool using_decrementing_iv_p;
+
+ /* The variable amount step for decrement IV. */
+ tree decrementing_iv_step;
+
/* True if we've decided to use partially-populated vectors for the
epilogue of loop. */
bool epil_using_partial_vectors_p;
@@ -890,6 +900,8 @@ public:
#define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable
#define LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P(L) (L)->can_use_partial_vectors_p
#define LOOP_VINFO_USING_PARTIAL_VECTORS_P(L) (L)->using_partial_vectors_p
+#define LOOP_VINFO_USING_DECREMENTING_IV_P(L) (L)->using_decrementing_iv_p
+#define LOOP_VINFO_DECREMENTING_IV_STEP(L) (L)->decrementing_iv_step
#define LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P(L) \
(L)->epil_using_partial_vectors_p
#define LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS(L) (L)->partial_load_store_bias