[V2] VECT: Change flow of decrement IV
Checks
Commit Message
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
Follow Richi's suggestion, I change current decrement IV flow from:
do {
remain -= MIN (vf, remain);
} while (remain != 0);
into:
do {
old_remain = remain;
len = MIN (vf, remain);
remain -= vf;
} while (old_remain >= vf);
to enhance SCEV.
Include fixes from kewen.
This patch will need to wait for Kewen's test feedback.
Testing on X86 is on-going
Co-Authored by: Kewen Lin <linkw@linux.ibm.com>
gcc/ChangeLog:
* tree-vect-loop-manip.cc (vect_set_loop_controls_directly): Change decrement IV flow.
(vect_set_loop_condition_partial_vectors): Ditto.
---
gcc/tree-vect-loop-manip.cc | 36 +++++++++++++++++++++++++-----------
1 file changed, 25 insertions(+), 11 deletions(-)
Comments
Bootstrapped and Regression on X86 no surprise different.
Looking forward Kewen's test report for this patch.
Thanks.
juzhe.zhong@rivai.ai
From: juzhe.zhong
Date: 2023-05-31 23:08
To: gcc-patches
CC: richard.sandiford; rguenther; linkw; Ju-Zhe Zhong
Subject: [PATCH V2] VECT: Change flow of decrement IV
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
Follow Richi's suggestion, I change current decrement IV flow from:
do {
remain -= MIN (vf, remain);
} while (remain != 0);
into:
do {
old_remain = remain;
len = MIN (vf, remain);
remain -= vf;
} while (old_remain >= vf);
to enhance SCEV.
Include fixes from kewen.
This patch will need to wait for Kewen's test feedback.
Testing on X86 is on-going
Co-Authored by: Kewen Lin <linkw@linux.ibm.com>
gcc/ChangeLog:
* tree-vect-loop-manip.cc (vect_set_loop_controls_directly): Change decrement IV flow.
(vect_set_loop_condition_partial_vectors): Ditto.
---
gcc/tree-vect-loop-manip.cc | 36 +++++++++++++++++++++++++-----------
1 file changed, 25 insertions(+), 11 deletions(-)
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index acf3642ceb2..3f735945e67 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -483,7 +483,7 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
gimple_stmt_iterator loop_cond_gsi,
rgroup_controls *rgc, tree niters,
tree niters_skip, bool might_wrap_p,
- tree *iv_step)
+ tree *iv_step, tree *compare_step)
{
tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
@@ -538,9 +538,9 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
...
vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
...
- ivtmp_35 = ivtmp_9 - _36;
+ ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
...
- if (ivtmp_35 != 0)
+ if (ivtmp_9 > POLY_INT_CST [4, 4])
goto <bb 4>; [83.33%]
else
goto <bb 5>; [16.67%]
@@ -549,13 +549,15 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
tree step = rgc->controls.length () == 1 ? rgc->controls[0]
: make_ssa_name (iv_type);
/* Create decrement IV. */
- create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
- insert_after, &index_before_incr, &index_after_incr);
+ create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
+ &incr_gsi, insert_after, &index_before_incr,
+ &index_after_incr);
gimple_seq_add_stmt (header_seq, gimple_build_assign (step, MIN_EXPR,
index_before_incr,
nitems_step));
*iv_step = step;
- return index_after_incr;
+ *compare_step = nitems_step;
+ return index_before_incr;
}
/* Create increment IV. */
@@ -825,6 +827,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
arbitrarily pick the last. */
tree test_ctrl = NULL_TREE;
tree iv_step = NULL_TREE;
+ tree compare_step = NULL_TREE;
rgroup_controls *rgc;
rgroup_controls *iv_rgc = nullptr;
unsigned int i;
@@ -861,7 +864,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
&preheader_seq, &header_seq,
loop_cond_gsi, rgc, niters,
niters_skip, might_wrap_p,
- &iv_step);
+ &iv_step, &compare_step);
iv_rgc = rgc;
}
@@ -884,10 +887,21 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
/* Get a boolean result that tells us whether to iterate. */
edge exit_edge = single_exit (loop);
- tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
- tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
- gcond *cond_stmt = gimple_build_cond (code, test_ctrl, zero_ctrl,
- NULL_TREE, NULL_TREE);
+ gcond *cond_stmt;
+ if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
+ {
+ gcc_assert (compare_step);
+ tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
+ cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
+ NULL_TREE);
+ }
+ else
+ {
+ tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
+ tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
+ cond_stmt
+ = gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
+ }
gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
/* The loop iterates (NITERS - 1) / VF + 1 times.
--
2.36.3
Hi Juzhe,
on 2023/6/1 08:31, juzhe.zhong@rivai.ai wrote:
> Bootstrapped and Regression on X86 no surprise different.
>
> Looking forward Kewen's test report for this patch.
>
This patch can be bootstrapped and regress-tested on
powerpc64-linux-gnu P9 and powerpc64le-linux-gnu P9/P10.
Also SPEC2017 int/fp bmks build and run successfully
with it on powerpc64le-linux-gnu P10 (with an explicit
parameter --param=vect-partial-vector-usage=2).
It can fix the 510.parest_r -5% degradation, and it speed-ed up
525.x264_r +1%, 521.wrf_r +2.03%, 544.nab_r +1.27% and
549.fotonik3d_r +3.22%, but it degraded 503.bwaves_r -4%, we have
some heuristics on load and load pct. for 503.bwaves_r on Power,
I suspected it's related, by considering vect-partial-vector-usage=2
isn't default on Power and this can fix exposed failures and parest_r
degradation, I think the bwaves_r degradation should not block this.
For bwaves_r degradation, I'll have a further look later, open a PR
if it's an actual issue rather than just costing heuristics having
no effects.
btw, it would be better to add one PR marker line to associate
this with PR109971, something like:
PR tree-optimization/109971
Thanks!
BR,
Kewen
> Thanks.
> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
> juzhe.zhong@rivai.ai
>
>
> *From:* juzhe.zhong <mailto:juzhe.zhong@rivai.ai>
> *Date:* 2023-05-31 23:08
> *To:* gcc-patches <mailto:gcc-patches@gcc.gnu.org>
> *CC:* richard.sandiford <mailto:richard.sandiford@arm.com>; rguenther <mailto:rguenther@suse.de>; linkw <mailto:linkw@linux.ibm.com>; Ju-Zhe Zhong <mailto:juzhe.zhong@rivai.ai>
> *Subject:* [PATCH V2] VECT: Change flow of decrement IV
> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>
> Follow Richi's suggestion, I change current decrement IV flow from:
>
> do {
> remain -= MIN (vf, remain);
> } while (remain != 0);
>
> into:
>
> do {
> old_remain = remain;
> len = MIN (vf, remain);
> remain -= vf;
> } while (old_remain >= vf);
>
> to enhance SCEV.
>
> Include fixes from kewen.
>
>
> This patch will need to wait for Kewen's test feedback.
>
> Testing on X86 is on-going
>
> Co-Authored by: Kewen Lin <linkw@linux.ibm.com>
>
> gcc/ChangeLog:
>
> * tree-vect-loop-manip.cc (vect_set_loop_controls_directly): Change decrement IV flow.
> (vect_set_loop_condition_partial_vectors): Ditto.
>
> ---
> gcc/tree-vect-loop-manip.cc | 36 +++++++++++++++++++++++++-----------
> 1 file changed, 25 insertions(+), 11 deletions(-)
>
> diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> index acf3642ceb2..3f735945e67 100644
> --- a/gcc/tree-vect-loop-manip.cc
> +++ b/gcc/tree-vect-loop-manip.cc
> @@ -483,7 +483,7 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
> gimple_stmt_iterator loop_cond_gsi,
> rgroup_controls *rgc, tree niters,
> tree niters_skip, bool might_wrap_p,
> - tree *iv_step)
> + tree *iv_step, tree *compare_step)
> {
> tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
> tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
> @@ -538,9 +538,9 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
> ...
> vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
> ...
> - ivtmp_35 = ivtmp_9 - _36;
> + ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
> ...
> - if (ivtmp_35 != 0)
> + if (ivtmp_9 > POLY_INT_CST [4, 4])
> goto <bb 4>; [83.33%]
> else
> goto <bb 5>; [16.67%]
> @@ -549,13 +549,15 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
> tree step = rgc->controls.length () == 1 ? rgc->controls[0]
> : make_ssa_name (iv_type);
> /* Create decrement IV. */
> - create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
> - insert_after, &index_before_incr, &index_after_incr);
> + create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
> + &incr_gsi, insert_after, &index_before_incr,
> + &index_after_incr);
> gimple_seq_add_stmt (header_seq, gimple_build_assign (step, MIN_EXPR,
> index_before_incr,
> nitems_step));
> *iv_step = step;
> - return index_after_incr;
> + *compare_step = nitems_step;
> + return index_before_incr;
> }
> /* Create increment IV. */
> @@ -825,6 +827,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
> arbitrarily pick the last. */
> tree test_ctrl = NULL_TREE;
> tree iv_step = NULL_TREE;
> + tree compare_step = NULL_TREE;
> rgroup_controls *rgc;
> rgroup_controls *iv_rgc = nullptr;
> unsigned int i;
> @@ -861,7 +864,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
> &preheader_seq, &header_seq,
> loop_cond_gsi, rgc, niters,
> niters_skip, might_wrap_p,
> - &iv_step);
> + &iv_step, &compare_step);
> iv_rgc = rgc;
> }
> @@ -884,10 +887,21 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
> /* Get a boolean result that tells us whether to iterate. */
> edge exit_edge = single_exit (loop);
> - tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
> - tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
> - gcond *cond_stmt = gimple_build_cond (code, test_ctrl, zero_ctrl,
> - NULL_TREE, NULL_TREE);
> + gcond *cond_stmt;
> + if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
> + {
> + gcc_assert (compare_step);
> + tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
> + cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
> + NULL_TREE);
> + }
> + else
> + {
> + tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
> + tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
> + cond_stmt
> + = gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
> + }
> gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
> /* The loop iterates (NITERS - 1) / VF + 1 times.
> --
> 2.36.3
>
>
Thanks kewen.
I have send V3 patch. Could you comment that ?
I want to make sure you do support that patch.
Thanks.
juzhe.zhong@rivai.ai
From: Kewen.Lin
Date: 2023-06-01 12:32
To: juzhe.zhong@rivai.ai
CC: richard.sandiford; rguenther; gcc-patches
Subject: Re: [PATCH V2] VECT: Change flow of decrement IV
Hi Juzhe,
on 2023/6/1 08:31, juzhe.zhong@rivai.ai wrote:
> Bootstrapped and Regression on X86 no surprise different.
>
> Looking forward Kewen's test report for this patch.
>
This patch can be bootstrapped and regress-tested on
powerpc64-linux-gnu P9 and powerpc64le-linux-gnu P9/P10.
Also SPEC2017 int/fp bmks build and run successfully
with it on powerpc64le-linux-gnu P10 (with an explicit
parameter --param=vect-partial-vector-usage=2).
It can fix the 510.parest_r -5% degradation, and it speed-ed up
525.x264_r +1%, 521.wrf_r +2.03%, 544.nab_r +1.27% and
549.fotonik3d_r +3.22%, but it degraded 503.bwaves_r -4%, we have
some heuristics on load and load pct. for 503.bwaves_r on Power,
I suspected it's related, by considering vect-partial-vector-usage=2
isn't default on Power and this can fix exposed failures and parest_r
degradation, I think the bwaves_r degradation should not block this.
For bwaves_r degradation, I'll have a further look later, open a PR
if it's an actual issue rather than just costing heuristics having
no effects.
btw, it would be better to add one PR marker line to associate
this with PR109971, something like:
PR tree-optimization/109971
Thanks!
BR,
Kewen
> Thanks.
> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
> juzhe.zhong@rivai.ai
>
>
> *From:* juzhe.zhong <mailto:juzhe.zhong@rivai.ai>
> *Date:* 2023-05-31 23:08
> *To:* gcc-patches <mailto:gcc-patches@gcc.gnu.org>
> *CC:* richard.sandiford <mailto:richard.sandiford@arm.com>; rguenther <mailto:rguenther@suse.de>; linkw <mailto:linkw@linux.ibm.com>; Ju-Zhe Zhong <mailto:juzhe.zhong@rivai.ai>
> *Subject:* [PATCH V2] VECT: Change flow of decrement IV
> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>
> Follow Richi's suggestion, I change current decrement IV flow from:
>
> do {
> remain -= MIN (vf, remain);
> } while (remain != 0);
>
> into:
>
> do {
> old_remain = remain;
> len = MIN (vf, remain);
> remain -= vf;
> } while (old_remain >= vf);
>
> to enhance SCEV.
>
> Include fixes from kewen.
>
>
> This patch will need to wait for Kewen's test feedback.
>
> Testing on X86 is on-going
>
> Co-Authored by: Kewen Lin <linkw@linux.ibm.com>
>
> gcc/ChangeLog:
>
> * tree-vect-loop-manip.cc (vect_set_loop_controls_directly): Change decrement IV flow.
> (vect_set_loop_condition_partial_vectors): Ditto.
>
> ---
> gcc/tree-vect-loop-manip.cc | 36 +++++++++++++++++++++++++-----------
> 1 file changed, 25 insertions(+), 11 deletions(-)
>
> diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> index acf3642ceb2..3f735945e67 100644
> --- a/gcc/tree-vect-loop-manip.cc
> +++ b/gcc/tree-vect-loop-manip.cc
> @@ -483,7 +483,7 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
> gimple_stmt_iterator loop_cond_gsi,
> rgroup_controls *rgc, tree niters,
> tree niters_skip, bool might_wrap_p,
> - tree *iv_step)
> + tree *iv_step, tree *compare_step)
> {
> tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
> tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
> @@ -538,9 +538,9 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
> ...
> vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
> ...
> - ivtmp_35 = ivtmp_9 - _36;
> + ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
> ...
> - if (ivtmp_35 != 0)
> + if (ivtmp_9 > POLY_INT_CST [4, 4])
> goto <bb 4>; [83.33%]
> else
> goto <bb 5>; [16.67%]
> @@ -549,13 +549,15 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
> tree step = rgc->controls.length () == 1 ? rgc->controls[0]
> : make_ssa_name (iv_type);
> /* Create decrement IV. */
> - create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
> - insert_after, &index_before_incr, &index_after_incr);
> + create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
> + &incr_gsi, insert_after, &index_before_incr,
> + &index_after_incr);
> gimple_seq_add_stmt (header_seq, gimple_build_assign (step, MIN_EXPR,
> index_before_incr,
> nitems_step));
> *iv_step = step;
> - return index_after_incr;
> + *compare_step = nitems_step;
> + return index_before_incr;
> }
> /* Create increment IV. */
> @@ -825,6 +827,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
> arbitrarily pick the last. */
> tree test_ctrl = NULL_TREE;
> tree iv_step = NULL_TREE;
> + tree compare_step = NULL_TREE;
> rgroup_controls *rgc;
> rgroup_controls *iv_rgc = nullptr;
> unsigned int i;
> @@ -861,7 +864,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
> &preheader_seq, &header_seq,
> loop_cond_gsi, rgc, niters,
> niters_skip, might_wrap_p,
> - &iv_step);
> + &iv_step, &compare_step);
> iv_rgc = rgc;
> }
> @@ -884,10 +887,21 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
> /* Get a boolean result that tells us whether to iterate. */
> edge exit_edge = single_exit (loop);
> - tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
> - tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
> - gcond *cond_stmt = gimple_build_cond (code, test_ctrl, zero_ctrl,
> - NULL_TREE, NULL_TREE);
> + gcond *cond_stmt;
> + if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
> + {
> + gcc_assert (compare_step);
> + tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
> + cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
> + NULL_TREE);
> + }
> + else
> + {
> + tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
> + tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
> + cond_stmt
> + = gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
> + }
> gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
> /* The loop iterates (NITERS - 1) / VF + 1 times.
> --
> 2.36.3
>
>
@@ -483,7 +483,7 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
gimple_stmt_iterator loop_cond_gsi,
rgroup_controls *rgc, tree niters,
tree niters_skip, bool might_wrap_p,
- tree *iv_step)
+ tree *iv_step, tree *compare_step)
{
tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
@@ -538,9 +538,9 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
...
vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
...
- ivtmp_35 = ivtmp_9 - _36;
+ ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
...
- if (ivtmp_35 != 0)
+ if (ivtmp_9 > POLY_INT_CST [4, 4])
goto <bb 4>; [83.33%]
else
goto <bb 5>; [16.67%]
@@ -549,13 +549,15 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
tree step = rgc->controls.length () == 1 ? rgc->controls[0]
: make_ssa_name (iv_type);
/* Create decrement IV. */
- create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
- insert_after, &index_before_incr, &index_after_incr);
+ create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
+ &incr_gsi, insert_after, &index_before_incr,
+ &index_after_incr);
gimple_seq_add_stmt (header_seq, gimple_build_assign (step, MIN_EXPR,
index_before_incr,
nitems_step));
*iv_step = step;
- return index_after_incr;
+ *compare_step = nitems_step;
+ return index_before_incr;
}
/* Create increment IV. */
@@ -825,6 +827,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
arbitrarily pick the last. */
tree test_ctrl = NULL_TREE;
tree iv_step = NULL_TREE;
+ tree compare_step = NULL_TREE;
rgroup_controls *rgc;
rgroup_controls *iv_rgc = nullptr;
unsigned int i;
@@ -861,7 +864,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
&preheader_seq, &header_seq,
loop_cond_gsi, rgc, niters,
niters_skip, might_wrap_p,
- &iv_step);
+ &iv_step, &compare_step);
iv_rgc = rgc;
}
@@ -884,10 +887,21 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
/* Get a boolean result that tells us whether to iterate. */
edge exit_edge = single_exit (loop);
- tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
- tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
- gcond *cond_stmt = gimple_build_cond (code, test_ctrl, zero_ctrl,
- NULL_TREE, NULL_TREE);
+ gcond *cond_stmt;
+ if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
+ {
+ gcc_assert (compare_step);
+ tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
+ cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
+ NULL_TREE);
+ }
+ else
+ {
+ tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
+ tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
+ cond_stmt
+ = gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
+ }
gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
/* The loop iterates (NITERS - 1) / VF + 1 times.