Middle-end: Fix bug of induction variable vectorization for RVV
Checks
Commit Message
PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112438
SELECT_VL result is not necessary always VF in non-final iteration.
Current GIMPLE IR is wrong:
# vect_vec_iv_.21_25 = PHI <_24(4), { 0, 1, 2, ... }(3)>
...
_24 = vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... };
After this patch which is correct for SELECT_VL:
# vect_vec_iv_.8_22 = PHI <_21(4), { 0, 1, 2, ... }(3)>
...
_35 = .SELECT_VL (ivtmp_33, POLY_INT_CST [4, 4]);
_21 = vect_vec_iv_.8_22 + { POLY_INT_CST [4, 4], ... };
kito, could you give more explanation ?
PR middle/112438
gcc/ChangeLog:
* tree-vect-loop.cc (vectorizable_induction): Fix bug.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/pr112438.c: New test.
---
.../gcc.target/riscv/rvv/autovec/pr112438.c | 35 +++++++++++++++++
gcc/tree-vect-loop.cc | 39 +++++++++++++++----
2 files changed, 67 insertions(+), 7 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
Comments
Sorry for wrong description on the log:
After this patch, the IR is:
_36 = .SELECT_VL (ivtmp_34, POLY_INT_CST [4, 4]);
_22 = (int) _36;
vect_cst__21 = [vec_duplicate_expr] _22;
juzhe.zhong@rivai.ai
From: Juzhe-Zhong
Date: 2023-11-08 18:53
To: gcc-patches
CC: richard.sandiford; rguenther; kito.cheng; kito.cheng; Juzhe-Zhong
Subject: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112438
SELECT_VL result is not necessary always VF in non-final iteration.
Current GIMPLE IR is wrong:
# vect_vec_iv_.21_25 = PHI <_24(4), { 0, 1, 2, ... }(3)>
...
_24 = vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... };
After this patch which is correct for SELECT_VL:
# vect_vec_iv_.8_22 = PHI <_21(4), { 0, 1, 2, ... }(3)>
...
_35 = .SELECT_VL (ivtmp_33, POLY_INT_CST [4, 4]);
_21 = vect_vec_iv_.8_22 + { POLY_INT_CST [4, 4], ... };
kito, could you give more explanation ?
PR middle/112438
gcc/ChangeLog:
* tree-vect-loop.cc (vectorizable_induction): Fix bug.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/pr112438.c: New test.
---
.../gcc.target/riscv/rvv/autovec/pr112438.c | 35 +++++++++++++++++
gcc/tree-vect-loop.cc | 39 +++++++++++++++----
2 files changed, 67 insertions(+), 7 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
new file mode 100644
index 00000000000..b326d56a52c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-vect-cost-model -ffast-math -fdump-tree-optimized-details" } */
+
+void
+foo (int n, int *__restrict in, int *__restrict out)
+{
+ for (int i = 0; i < n; i += 1)
+ {
+ out[i] = in[i] + i;
+ }
+}
+
+void
+foo2 (int n, float * __restrict in,
+float * __restrict out)
+{
+ for (int i = 0; i < n; i += 1)
+ {
+ out[i] = in[i] + i;
+ }
+}
+
+void
+foo3 (int n, float * __restrict in,
+float * __restrict out, float x)
+{
+ for (int i = 0; i < n; i += 1)
+ {
+ out[i] = in[i] + i* i;
+ }
+}
+
+/* We don't want to see vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... }. */
+/* { dg-final { scan-tree-dump-not "\\+ \{ POLY_INT_CST" "optimized" } } */
+
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index a544bc9b059..3e103946168 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10309,10 +10309,30 @@ vectorizable_induction (loop_vec_info loop_vinfo,
new_name = step_expr;
else
{
+ gimple_seq seq = NULL;
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+ {
+ /* When we're using loop_len produced by SELEC_VL, the non-final
+ iterations are not always processing VF elements. So vectorize
+ induction variable instead of
+
+ _21 = vect_vec_iv_.6_22 + { VF, ... };
+
+ We should generate:
+
+ _35 = .SELECT_VL (ivtmp_33, VF);
+ vect_cst__22 = [vec_duplicate_expr] _35;
+ _21 = vect_vec_iv_.6_22 + vect_cst__22; */
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+ tree len
+ = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
+ expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
+ unshare_expr (len)),
+ &seq, true, NULL_TREE);
+ }
/* iv_loop is the loop to be vectorized. Generate:
vec_step = [VF*S, VF*S, VF*S, VF*S] */
- gimple_seq seq = NULL;
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
+ else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
{
expr = build_int_cst (integer_type_node, vf);
expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
@@ -10323,8 +10343,13 @@ vectorizable_induction (loop_vec_info loop_vinfo,
expr, step_expr);
if (seq)
{
- new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
- gcc_assert (!new_bb);
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+ gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
+ else
+ {
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
+ gcc_assert (!new_bb);
+ }
}
}
@@ -10332,9 +10357,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
gcc_assert (CONSTANT_CLASS_P (new_name)
|| TREE_CODE (new_name) == SSA_NAME);
new_vec = build_vector_from_val (step_vectype, t);
- vec_step = vect_init_vector (loop_vinfo, stmt_info,
- new_vec, step_vectype, NULL);
-
+ vec_step
+ = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
+ LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
/* Create the following def-use cycle:
loop prolog:
--
2.36.3
Bootstrap + regression on X86 passed.
Ok for trunk ?
juzhe.zhong@rivai.ai
From: Juzhe-Zhong
Date: 2023-11-08 18:53
To: gcc-patches
CC: richard.sandiford; rguenther; kito.cheng; kito.cheng; Juzhe-Zhong
Subject: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112438
SELECT_VL result is not necessary always VF in non-final iteration.
Current GIMPLE IR is wrong:
# vect_vec_iv_.21_25 = PHI <_24(4), { 0, 1, 2, ... }(3)>
...
_24 = vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... };
After this patch which is correct for SELECT_VL:
# vect_vec_iv_.8_22 = PHI <_21(4), { 0, 1, 2, ... }(3)>
...
_35 = .SELECT_VL (ivtmp_33, POLY_INT_CST [4, 4]);
_21 = vect_vec_iv_.8_22 + { POLY_INT_CST [4, 4], ... };
kito, could you give more explanation ?
PR middle/112438
gcc/ChangeLog:
* tree-vect-loop.cc (vectorizable_induction): Fix bug.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/pr112438.c: New test.
---
.../gcc.target/riscv/rvv/autovec/pr112438.c | 35 +++++++++++++++++
gcc/tree-vect-loop.cc | 39 +++++++++++++++----
2 files changed, 67 insertions(+), 7 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
new file mode 100644
index 00000000000..b326d56a52c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-vect-cost-model -ffast-math -fdump-tree-optimized-details" } */
+
+void
+foo (int n, int *__restrict in, int *__restrict out)
+{
+ for (int i = 0; i < n; i += 1)
+ {
+ out[i] = in[i] + i;
+ }
+}
+
+void
+foo2 (int n, float * __restrict in,
+float * __restrict out)
+{
+ for (int i = 0; i < n; i += 1)
+ {
+ out[i] = in[i] + i;
+ }
+}
+
+void
+foo3 (int n, float * __restrict in,
+float * __restrict out, float x)
+{
+ for (int i = 0; i < n; i += 1)
+ {
+ out[i] = in[i] + i* i;
+ }
+}
+
+/* We don't want to see vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... }. */
+/* { dg-final { scan-tree-dump-not "\\+ \{ POLY_INT_CST" "optimized" } } */
+
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index a544bc9b059..3e103946168 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10309,10 +10309,30 @@ vectorizable_induction (loop_vec_info loop_vinfo,
new_name = step_expr;
else
{
+ gimple_seq seq = NULL;
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+ {
+ /* When we're using loop_len produced by SELEC_VL, the non-final
+ iterations are not always processing VF elements. So vectorize
+ induction variable instead of
+
+ _21 = vect_vec_iv_.6_22 + { VF, ... };
+
+ We should generate:
+
+ _35 = .SELECT_VL (ivtmp_33, VF);
+ vect_cst__22 = [vec_duplicate_expr] _35;
+ _21 = vect_vec_iv_.6_22 + vect_cst__22; */
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+ tree len
+ = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
+ expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
+ unshare_expr (len)),
+ &seq, true, NULL_TREE);
+ }
/* iv_loop is the loop to be vectorized. Generate:
vec_step = [VF*S, VF*S, VF*S, VF*S] */
- gimple_seq seq = NULL;
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
+ else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
{
expr = build_int_cst (integer_type_node, vf);
expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
@@ -10323,8 +10343,13 @@ vectorizable_induction (loop_vec_info loop_vinfo,
expr, step_expr);
if (seq)
{
- new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
- gcc_assert (!new_bb);
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+ gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
+ else
+ {
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
+ gcc_assert (!new_bb);
+ }
}
}
@@ -10332,9 +10357,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
gcc_assert (CONSTANT_CLASS_P (new_name)
|| TREE_CODE (new_name) == SSA_NAME);
new_vec = build_vector_from_val (step_vectype, t);
- vec_step = vect_init_vector (loop_vinfo, stmt_info,
- new_vec, step_vectype, NULL);
-
+ vec_step
+ = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
+ LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
/* Create the following def-use cycle:
loop prolog:
--
2.36.3
On Wed, Nov 8, 2023 at 11:53 AM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
>
> PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112438
>
> SELECT_VL result is not necessary always VF in non-final iteration.
>
> Current GIMPLE IR is wrong:
>
> # vect_vec_iv_.21_25 = PHI <_24(4), { 0, 1, 2, ... }(3)>
> ...
> _24 = vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... };
>
> After this patch which is correct for SELECT_VL:
>
> # vect_vec_iv_.8_22 = PHI <_21(4), { 0, 1, 2, ... }(3)>
> ...
> _35 = .SELECT_VL (ivtmp_33, POLY_INT_CST [4, 4]);
> _21 = vect_vec_iv_.8_22 + { POLY_INT_CST [4, 4], ... };
>
> kito, could you give more explanation ?
>
> PR middle/112438
>
> gcc/ChangeLog:
>
> * tree-vect-loop.cc (vectorizable_induction): Fix bug.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/pr112438.c: New test.
>
> ---
> .../gcc.target/riscv/rvv/autovec/pr112438.c | 35 +++++++++++++++++
> gcc/tree-vect-loop.cc | 39 +++++++++++++++----
> 2 files changed, 67 insertions(+), 7 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> new file mode 100644
> index 00000000000..b326d56a52c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-vect-cost-model -ffast-math -fdump-tree-optimized-details" } */
> +
> +void
> +foo (int n, int *__restrict in, int *__restrict out)
> +{
> + for (int i = 0; i < n; i += 1)
> + {
> + out[i] = in[i] + i;
> + }
> +}
> +
> +void
> +foo2 (int n, float * __restrict in,
> +float * __restrict out)
> +{
> + for (int i = 0; i < n; i += 1)
> + {
> + out[i] = in[i] + i;
> + }
> +}
> +
> +void
> +foo3 (int n, float * __restrict in,
> +float * __restrict out, float x)
> +{
> + for (int i = 0; i < n; i += 1)
> + {
> + out[i] = in[i] + i* i;
> + }
> +}
> +
> +/* We don't want to see vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... }. */
> +/* { dg-final { scan-tree-dump-not "\\+ \{ POLY_INT_CST" "optimized" } } */
> +
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index a544bc9b059..3e103946168 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -10309,10 +10309,30 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> new_name = step_expr;
> else
> {
> + gimple_seq seq = NULL;
> + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> + {
> + /* When we're using loop_len produced by SELEC_VL, the non-final
> + iterations are not always processing VF elements. So vectorize
> + induction variable instead of
> +
> + _21 = vect_vec_iv_.6_22 + { VF, ... };
> +
> + We should generate:
> +
> + _35 = .SELECT_VL (ivtmp_33, VF);
> + vect_cst__22 = [vec_duplicate_expr] _35;
> + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> + tree len
> + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> + unshare_expr (len)),
> + &seq, true, NULL_TREE);
> + }
I think it would be better to split out building a tree from VF from both
arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
Btw, you are not patching the SLP path here which I believe has the same
problem but is currently exempt from non-constant VF at least.
Richard.
> /* iv_loop is the loop to be vectorized. Generate:
> vec_step = [VF*S, VF*S, VF*S, VF*S] */
> - gimple_seq seq = NULL;
> - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> {
> expr = build_int_cst (integer_type_node, vf);
> expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> @@ -10323,8 +10343,13 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> expr, step_expr);
> if (seq)
> {
> - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> - gcc_assert (!new_bb);
> + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> + else
> + {
> + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> + gcc_assert (!new_bb);
> + }
> }
> }
>
> @@ -10332,9 +10357,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> gcc_assert (CONSTANT_CLASS_P (new_name)
> || TREE_CODE (new_name) == SSA_NAME);
> new_vec = build_vector_from_val (step_vectype, t);
> - vec_step = vect_init_vector (loop_vinfo, stmt_info,
> - new_vec, step_vectype, NULL);
> -
> + vec_step
> + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
> + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
>
> /* Create the following def-use cycle:
> loop prolog:
> --
> 2.36.3
>
Hi, Richard.
>> I think it would be better to split out building a tree from VF from both
>> arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
Ok.
>> Btw, you are not patching the SLP path here which I believe has the same
>> problem but is currently exempt from non-constant VF at least.
IMHO, we won't have issue of SLP since we are always using MIN_EXPR to calculate length.
if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
{
tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
if (direct_internal_fn_supported_p (IFN_SELECT_VL, iv_type,
OPTIMIZE_FOR_SPEED)
&& LOOP_VINFO_LENS (loop_vinfo).length () == 1
&& LOOP_VINFO_LENS (loop_vinfo)[0].factor == 1 && !slp
&& (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|| !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()))
LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = true;
}
The problem is SELECT_VL may produce non-VF in non-final iteration, wheras MIN_EXPR always has VF on non-final iteration.
Maybe add an assertion to assert non-slp if select_vl_p is true ? Is it reasonable ?
juzhe.zhong@rivai.ai
From: Richard Biener
Date: 2023-11-09 20:16
To: Juzhe-Zhong
CC: gcc-patches; richard.sandiford; rguenther; kito.cheng; kito.cheng
Subject: Re: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
On Wed, Nov 8, 2023 at 11:53 AM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
>
> PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112438
>
> SELECT_VL result is not necessary always VF in non-final iteration.
>
> Current GIMPLE IR is wrong:
>
> # vect_vec_iv_.21_25 = PHI <_24(4), { 0, 1, 2, ... }(3)>
> ...
> _24 = vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... };
>
> After this patch which is correct for SELECT_VL:
>
> # vect_vec_iv_.8_22 = PHI <_21(4), { 0, 1, 2, ... }(3)>
> ...
> _35 = .SELECT_VL (ivtmp_33, POLY_INT_CST [4, 4]);
> _21 = vect_vec_iv_.8_22 + { POLY_INT_CST [4, 4], ... };
>
> kito, could you give more explanation ?
>
> PR middle/112438
>
> gcc/ChangeLog:
>
> * tree-vect-loop.cc (vectorizable_induction): Fix bug.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/pr112438.c: New test.
>
> ---
> .../gcc.target/riscv/rvv/autovec/pr112438.c | 35 +++++++++++++++++
> gcc/tree-vect-loop.cc | 39 +++++++++++++++----
> 2 files changed, 67 insertions(+), 7 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> new file mode 100644
> index 00000000000..b326d56a52c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-vect-cost-model -ffast-math -fdump-tree-optimized-details" } */
> +
> +void
> +foo (int n, int *__restrict in, int *__restrict out)
> +{
> + for (int i = 0; i < n; i += 1)
> + {
> + out[i] = in[i] + i;
> + }
> +}
> +
> +void
> +foo2 (int n, float * __restrict in,
> +float * __restrict out)
> +{
> + for (int i = 0; i < n; i += 1)
> + {
> + out[i] = in[i] + i;
> + }
> +}
> +
> +void
> +foo3 (int n, float * __restrict in,
> +float * __restrict out, float x)
> +{
> + for (int i = 0; i < n; i += 1)
> + {
> + out[i] = in[i] + i* i;
> + }
> +}
> +
> +/* We don't want to see vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... }. */
> +/* { dg-final { scan-tree-dump-not "\\+ \{ POLY_INT_CST" "optimized" } } */
> +
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index a544bc9b059..3e103946168 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -10309,10 +10309,30 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> new_name = step_expr;
> else
> {
> + gimple_seq seq = NULL;
> + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> + {
> + /* When we're using loop_len produced by SELEC_VL, the non-final
> + iterations are not always processing VF elements. So vectorize
> + induction variable instead of
> +
> + _21 = vect_vec_iv_.6_22 + { VF, ... };
> +
> + We should generate:
> +
> + _35 = .SELECT_VL (ivtmp_33, VF);
> + vect_cst__22 = [vec_duplicate_expr] _35;
> + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> + tree len
> + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> + unshare_expr (len)),
> + &seq, true, NULL_TREE);
> + }
I think it would be better to split out building a tree from VF from both
arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
Btw, you are not patching the SLP path here which I believe has the same
problem but is currently exempt from non-constant VF at least.
Richard.
> /* iv_loop is the loop to be vectorized. Generate:
> vec_step = [VF*S, VF*S, VF*S, VF*S] */
> - gimple_seq seq = NULL;
> - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> {
> expr = build_int_cst (integer_type_node, vf);
> expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> @@ -10323,8 +10343,13 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> expr, step_expr);
> if (seq)
> {
> - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> - gcc_assert (!new_bb);
> + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> + else
> + {
> + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> + gcc_assert (!new_bb);
> + }
> }
> }
>
> @@ -10332,9 +10357,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> gcc_assert (CONSTANT_CLASS_P (new_name)
> || TREE_CODE (new_name) == SSA_NAME);
> new_vec = build_vector_from_val (step_vectype, t);
> - vec_step = vect_init_vector (loop_vinfo, stmt_info,
> - new_vec, step_vectype, NULL);
> -
> + vec_step
> + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
> + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
>
> /* Create the following def-use cycle:
> loop prolog:
> --
> 2.36.3
>
Hi, Richard.
>> I think it would be better to split out building a tree from VF from both
>> arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
I am trying to split out building tree from both arms as you suggested..
Could you take a look the following codes ?
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 8abc1937d74..24a86187d11 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10315,19 +10315,47 @@ vectorizable_induction (loop_vec_info loop_vinfo,
/* iv_loop is the loop to be vectorized. Generate:
vec_step = [VF*S, VF*S, VF*S, VF*S] */
gimple_seq seq = NULL;
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
{
- expr = build_int_cst (integer_type_node, vf);
- expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
+ /* When we're using loop_len produced by SELEC_VL, the non-final
+ iterations are not always processing VF elements. So vectorize
+ induction variable instead of
+
+ _21 = vect_vec_iv_.6_22 + { VF, ... };
+
+ We should generate:
+
+ _35 = .SELECT_VL (ivtmp_33, VF);
+ vect_cst__22 = [vec_duplicate_expr] _35;
+ _21 = vect_vec_iv_.6_22 + vect_cst__22; */
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+ tree len
+ = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
+ expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
+ unshare_expr (len)),
+ &seq, true, NULL_TREE);
}
else
- expr = build_int_cst (TREE_TYPE (step_expr), vf);
+ {
+ bool float_p = SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr));
+ expr = build_int_cst (float_p ? integer_type_node
+ : TREE_TYPE (step_expr),
+ vf);
+ if (float_p)
+ expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
+ }
+
new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
expr, step_expr);
if (seq)
{
- new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
- gcc_assert (!new_bb);
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+ gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
+ else
+ {
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
+ gcc_assert (!new_bb);
+ }
}
}
@@ -10335,9 +10363,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
gcc_assert (CONSTANT_CLASS_P (new_name)
|| TREE_CODE (new_name) == SSA_NAME);
new_vec = build_vector_from_val (step_vectype, t);
- vec_step = vect_init_vector (loop_vinfo, stmt_info,
- new_vec, step_vectype, NULL);
-
+ vec_step
+ = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
+ LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
Thanks.
juzhe.zhong@rivai.ai
From: Richard Biener
Date: 2023-11-09 20:16
To: Juzhe-Zhong
CC: gcc-patches; richard.sandiford; rguenther; kito.cheng; kito.cheng
Subject: Re: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
On Wed, Nov 8, 2023 at 11:53 AM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
>
> PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112438
>
> SELECT_VL result is not necessary always VF in non-final iteration.
>
> Current GIMPLE IR is wrong:
>
> # vect_vec_iv_.21_25 = PHI <_24(4), { 0, 1, 2, ... }(3)>
> ...
> _24 = vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... };
>
> After this patch which is correct for SELECT_VL:
>
> # vect_vec_iv_.8_22 = PHI <_21(4), { 0, 1, 2, ... }(3)>
> ...
> _35 = .SELECT_VL (ivtmp_33, POLY_INT_CST [4, 4]);
> _21 = vect_vec_iv_.8_22 + { POLY_INT_CST [4, 4], ... };
>
> kito, could you give more explanation ?
>
> PR middle/112438
>
> gcc/ChangeLog:
>
> * tree-vect-loop.cc (vectorizable_induction): Fix bug.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/pr112438.c: New test.
>
> ---
> .../gcc.target/riscv/rvv/autovec/pr112438.c | 35 +++++++++++++++++
> gcc/tree-vect-loop.cc | 39 +++++++++++++++----
> 2 files changed, 67 insertions(+), 7 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> new file mode 100644
> index 00000000000..b326d56a52c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-vect-cost-model -ffast-math -fdump-tree-optimized-details" } */
> +
> +void
> +foo (int n, int *__restrict in, int *__restrict out)
> +{
> + for (int i = 0; i < n; i += 1)
> + {
> + out[i] = in[i] + i;
> + }
> +}
> +
> +void
> +foo2 (int n, float * __restrict in,
> +float * __restrict out)
> +{
> + for (int i = 0; i < n; i += 1)
> + {
> + out[i] = in[i] + i;
> + }
> +}
> +
> +void
> +foo3 (int n, float * __restrict in,
> +float * __restrict out, float x)
> +{
> + for (int i = 0; i < n; i += 1)
> + {
> + out[i] = in[i] + i* i;
> + }
> +}
> +
> +/* We don't want to see vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... }. */
> +/* { dg-final { scan-tree-dump-not "\\+ \{ POLY_INT_CST" "optimized" } } */
> +
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index a544bc9b059..3e103946168 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -10309,10 +10309,30 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> new_name = step_expr;
> else
> {
> + gimple_seq seq = NULL;
> + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> + {
> + /* When we're using loop_len produced by SELEC_VL, the non-final
> + iterations are not always processing VF elements. So vectorize
> + induction variable instead of
> +
> + _21 = vect_vec_iv_.6_22 + { VF, ... };
> +
> + We should generate:
> +
> + _35 = .SELECT_VL (ivtmp_33, VF);
> + vect_cst__22 = [vec_duplicate_expr] _35;
> + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> + tree len
> + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> + unshare_expr (len)),
> + &seq, true, NULL_TREE);
> + }
I think it would be better to split out building a tree from VF from both
arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
Btw, you are not patching the SLP path here which I believe has the same
problem but is currently exempt from non-constant VF at least.
Richard.
> /* iv_loop is the loop to be vectorized. Generate:
> vec_step = [VF*S, VF*S, VF*S, VF*S] */
> - gimple_seq seq = NULL;
> - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> {
> expr = build_int_cst (integer_type_node, vf);
> expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> @@ -10323,8 +10343,13 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> expr, step_expr);
> if (seq)
> {
> - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> - gcc_assert (!new_bb);
> + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> + else
> + {
> + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> + gcc_assert (!new_bb);
> + }
> }
> }
>
> @@ -10332,9 +10357,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> gcc_assert (CONSTANT_CLASS_P (new_name)
> || TREE_CODE (new_name) == SSA_NAME);
> new_vec = build_vector_from_val (step_vectype, t);
> - vec_step = vect_init_vector (loop_vinfo, stmt_info,
> - new_vec, step_vectype, NULL);
> -
> + vec_step
> + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
> + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
>
> /* Create the following def-use cycle:
> loop prolog:
> --
> 2.36.3
>
On Thu, 9 Nov 2023, ??? wrote:
> Hi, Richard.
>
> >> I think it would be better to split out building a tree from VF from both
> >> arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
>
> I am trying to split out building tree from both arms as you suggested..
> Could you take a look the following codes ?
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 8abc1937d74..24a86187d11 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -10315,19 +10315,47 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> /* iv_loop is the loop to be vectorized. Generate:
> vec_step = [VF*S, VF*S, VF*S, VF*S] */
> gimple_seq seq = NULL;
> - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> {
> - expr = build_int_cst (integer_type_node, vf);
> - expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> + /* When we're using loop_len produced by SELEC_VL, the non-final
> + iterations are not always processing VF elements. So vectorize
> + induction variable instead of
> +
> + _21 = vect_vec_iv_.6_22 + { VF, ... };
> +
> + We should generate:
> +
> + _35 = .SELECT_VL (ivtmp_33, VF);
> + vect_cst__22 = [vec_duplicate_expr] _35;
> + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> + tree len
> + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> + unshare_expr (len)),
> + &seq, true, NULL_TREE);
> }
> else
> - expr = build_int_cst (TREE_TYPE (step_expr), vf);
> + {
> + bool float_p = SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr));
> + expr = build_int_cst (float_p ? integer_type_node
> + : TREE_TYPE (step_expr),
> + vf);
> + if (float_p)
> + expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> + }
> +
I meant you keep the existing flow in the function, specifically
I think you should handle SCALAR_FLOAT_TYPE_P like it was previously
handled, just build 'vf' in the dynamic way.
> new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
> expr, step_expr);
> if (seq)
> {
> - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> - gcc_assert (!new_bb);
> + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> + else
> + {
> + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> + gcc_assert (!new_bb);
> + }
> }
> }
>
> @@ -10335,9 +10363,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> gcc_assert (CONSTANT_CLASS_P (new_name)
> || TREE_CODE (new_name) == SSA_NAME);
> new_vec = build_vector_from_val (step_vectype, t);
> - vec_step = vect_init_vector (loop_vinfo, stmt_info,
> - new_vec, step_vectype, NULL);
> -
> + vec_step
> + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
> + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
again this makes the flow hard to follow. I suppose refactoring this
overall to
if (nested_in_vect_loop)
...
else if (LOOP_VINFO_USING_SELECT_VL_P (..))
...
else
...
and duplicate this tail into the cases makes it easier to follow.
For nested_in_vect_loop we never have LOOP_VINFO_USING_SELECT_VL_P?
Richard.
> Thanks.
>
>
> juzhe.zhong@rivai.ai
>
> From: Richard Biener
> Date: 2023-11-09 20:16
> To: Juzhe-Zhong
> CC: gcc-patches; richard.sandiford; rguenther; kito.cheng; kito.cheng
> Subject: Re: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
> On Wed, Nov 8, 2023 at 11:53?AM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
> >
> > PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112438
> >
> > SELECT_VL result is not necessary always VF in non-final iteration.
> >
> > Current GIMPLE IR is wrong:
> >
> > # vect_vec_iv_.21_25 = PHI <_24(4), { 0, 1, 2, ... }(3)>
> > ...
> > _24 = vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... };
> >
> > After this patch which is correct for SELECT_VL:
> >
> > # vect_vec_iv_.8_22 = PHI <_21(4), { 0, 1, 2, ... }(3)>
> > ...
> > _35 = .SELECT_VL (ivtmp_33, POLY_INT_CST [4, 4]);
> > _21 = vect_vec_iv_.8_22 + { POLY_INT_CST [4, 4], ... };
> >
> > kito, could you give more explanation ?
> >
> > PR middle/112438
> >
> > gcc/ChangeLog:
> >
> > * tree-vect-loop.cc (vectorizable_induction): Fix bug.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/riscv/rvv/autovec/pr112438.c: New test.
> >
> > ---
> > .../gcc.target/riscv/rvv/autovec/pr112438.c | 35 +++++++++++++++++
> > gcc/tree-vect-loop.cc | 39 +++++++++++++++----
> > 2 files changed, 67 insertions(+), 7 deletions(-)
> > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> >
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> > new file mode 100644
> > index 00000000000..b326d56a52c
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> > @@ -0,0 +1,35 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-vect-cost-model -ffast-math -fdump-tree-optimized-details" } */
> > +
> > +void
> > +foo (int n, int *__restrict in, int *__restrict out)
> > +{
> > + for (int i = 0; i < n; i += 1)
> > + {
> > + out[i] = in[i] + i;
> > + }
> > +}
> > +
> > +void
> > +foo2 (int n, float * __restrict in,
> > +float * __restrict out)
> > +{
> > + for (int i = 0; i < n; i += 1)
> > + {
> > + out[i] = in[i] + i;
> > + }
> > +}
> > +
> > +void
> > +foo3 (int n, float * __restrict in,
> > +float * __restrict out, float x)
> > +{
> > + for (int i = 0; i < n; i += 1)
> > + {
> > + out[i] = in[i] + i* i;
> > + }
> > +}
> > +
> > +/* We don't want to see vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... }. */
> > +/* { dg-final { scan-tree-dump-not "\\+ \{ POLY_INT_CST" "optimized" } } */
> > +
> > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > index a544bc9b059..3e103946168 100644
> > --- a/gcc/tree-vect-loop.cc
> > +++ b/gcc/tree-vect-loop.cc
> > @@ -10309,10 +10309,30 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > new_name = step_expr;
> > else
> > {
> > + gimple_seq seq = NULL;
> > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > + {
> > + /* When we're using loop_len produced by SELEC_VL, the non-final
> > + iterations are not always processing VF elements. So vectorize
> > + induction variable instead of
> > +
> > + _21 = vect_vec_iv_.6_22 + { VF, ... };
> > +
> > + We should generate:
> > +
> > + _35 = .SELECT_VL (ivtmp_33, VF);
> > + vect_cst__22 = [vec_duplicate_expr] _35;
> > + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> > + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > + tree len
> > + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> > + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> > + unshare_expr (len)),
> > + &seq, true, NULL_TREE);
> > + }
>
> I think it would be better to split out building a tree from VF from both
> arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
>
> Btw, you are not patching the SLP path here which I believe has the same
> problem but is currently exempt from non-constant VF at least.
>
> Richard.
>
> > /* iv_loop is the loop to be vectorized. Generate:
> > vec_step = [VF*S, VF*S, VF*S, VF*S] */
> > - gimple_seq seq = NULL;
> > - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> > + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> > {
> > expr = build_int_cst (integer_type_node, vf);
> > expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> > @@ -10323,8 +10343,13 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > expr, step_expr);
> > if (seq)
> > {
> > - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > - gcc_assert (!new_bb);
> > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> > + else
> > + {
> > + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > + gcc_assert (!new_bb);
> > + }
> > }
> > }
> >
> > @@ -10332,9 +10357,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > gcc_assert (CONSTANT_CLASS_P (new_name)
> > || TREE_CODE (new_name) == SSA_NAME);
> > new_vec = build_vector_from_val (step_vectype, t);
> > - vec_step = vect_init_vector (loop_vinfo, stmt_info,
> > - new_vec, step_vectype, NULL);
> > -
> > + vec_step
> > + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
> > + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
> >
> > /* Create the following def-use cycle:
> > loop prolog:
> > --
> > 2.36.3
> >
>
>
Hi, Richard.
>> For nested_in_vect_loop we never have LOOP_VINFO_USING_SELECT_VL_P?
Could you give me an example of nested loop ?
For now, I can't produce a case.
Thanks a lot for the comments, I will try to refactor as you suggested.
juzhe.zhong@rivai.ai
From: Richard Biener
Date: 2023-11-10 17:46
To: 钟居哲
CC: richard.guenther; gcc-patches; richard.sandiford; kito.cheng; kito.cheng
Subject: Re: Re: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
On Thu, 9 Nov 2023, ??? wrote:
> Hi, Richard.
>
> >> I think it would be better to split out building a tree from VF from both
> >> arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
>
> I am trying to split out building tree from both arms as you suggested..
> Could you take a look the following codes ?
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 8abc1937d74..24a86187d11 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -10315,19 +10315,47 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> /* iv_loop is the loop to be vectorized. Generate:
> vec_step = [VF*S, VF*S, VF*S, VF*S] */
> gimple_seq seq = NULL;
> - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> {
> - expr = build_int_cst (integer_type_node, vf);
> - expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> + /* When we're using loop_len produced by SELEC_VL, the non-final
> + iterations are not always processing VF elements. So vectorize
> + induction variable instead of
> +
> + _21 = vect_vec_iv_.6_22 + { VF, ... };
> +
> + We should generate:
> +
> + _35 = .SELECT_VL (ivtmp_33, VF);
> + vect_cst__22 = [vec_duplicate_expr] _35;
> + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> + tree len
> + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> + unshare_expr (len)),
> + &seq, true, NULL_TREE);
> }
> else
> - expr = build_int_cst (TREE_TYPE (step_expr), vf);
> + {
> + bool float_p = SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr));
> + expr = build_int_cst (float_p ? integer_type_node
> + : TREE_TYPE (step_expr),
> + vf);
> + if (float_p)
> + expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> + }
> +
I meant you keep the existing flow in the function, specifically
I think you should handle SCALAR_FLOAT_TYPE_P like it was previously
handled, just build 'vf' in the dynamic way.
> new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
> expr, step_expr);
> if (seq)
> {
> - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> - gcc_assert (!new_bb);
> + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> + else
> + {
> + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> + gcc_assert (!new_bb);
> + }
> }
> }
>
> @@ -10335,9 +10363,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> gcc_assert (CONSTANT_CLASS_P (new_name)
> || TREE_CODE (new_name) == SSA_NAME);
> new_vec = build_vector_from_val (step_vectype, t);
> - vec_step = vect_init_vector (loop_vinfo, stmt_info,
> - new_vec, step_vectype, NULL);
> -
> + vec_step
> + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
> + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
again this makes the flow hard to follow. I suppose refactoring this
overall to
if (nested_in_vect_loop)
...
else if (LOOP_VINFO_USING_SELECT_VL_P (..))
...
else
...
and duplicate this tail into the cases makes it easier to follow.
For nested_in_vect_loop we never have LOOP_VINFO_USING_SELECT_VL_P?
Richard.
> Thanks.
>
>
> juzhe.zhong@rivai.ai
>
> From: Richard Biener
> Date: 2023-11-09 20:16
> To: Juzhe-Zhong
> CC: gcc-patches; richard.sandiford; rguenther; kito.cheng; kito.cheng
> Subject: Re: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
> On Wed, Nov 8, 2023 at 11:53?AM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
> >
> > PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112438
> >
> > SELECT_VL result is not necessary always VF in non-final iteration.
> >
> > Current GIMPLE IR is wrong:
> >
> > # vect_vec_iv_.21_25 = PHI <_24(4), { 0, 1, 2, ... }(3)>
> > ...
> > _24 = vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... };
> >
> > After this patch which is correct for SELECT_VL:
> >
> > # vect_vec_iv_.8_22 = PHI <_21(4), { 0, 1, 2, ... }(3)>
> > ...
> > _35 = .SELECT_VL (ivtmp_33, POLY_INT_CST [4, 4]);
> > _21 = vect_vec_iv_.8_22 + { POLY_INT_CST [4, 4], ... };
> >
> > kito, could you give more explanation ?
> >
> > PR middle/112438
> >
> > gcc/ChangeLog:
> >
> > * tree-vect-loop.cc (vectorizable_induction): Fix bug.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/riscv/rvv/autovec/pr112438.c: New test.
> >
> > ---
> > .../gcc.target/riscv/rvv/autovec/pr112438.c | 35 +++++++++++++++++
> > gcc/tree-vect-loop.cc | 39 +++++++++++++++----
> > 2 files changed, 67 insertions(+), 7 deletions(-)
> > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> >
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> > new file mode 100644
> > index 00000000000..b326d56a52c
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> > @@ -0,0 +1,35 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-vect-cost-model -ffast-math -fdump-tree-optimized-details" } */
> > +
> > +void
> > +foo (int n, int *__restrict in, int *__restrict out)
> > +{
> > + for (int i = 0; i < n; i += 1)
> > + {
> > + out[i] = in[i] + i;
> > + }
> > +}
> > +
> > +void
> > +foo2 (int n, float * __restrict in,
> > +float * __restrict out)
> > +{
> > + for (int i = 0; i < n; i += 1)
> > + {
> > + out[i] = in[i] + i;
> > + }
> > +}
> > +
> > +void
> > +foo3 (int n, float * __restrict in,
> > +float * __restrict out, float x)
> > +{
> > + for (int i = 0; i < n; i += 1)
> > + {
> > + out[i] = in[i] + i* i;
> > + }
> > +}
> > +
> > +/* We don't want to see vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... }. */
> > +/* { dg-final { scan-tree-dump-not "\\+ \{ POLY_INT_CST" "optimized" } } */
> > +
> > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > index a544bc9b059..3e103946168 100644
> > --- a/gcc/tree-vect-loop.cc
> > +++ b/gcc/tree-vect-loop.cc
> > @@ -10309,10 +10309,30 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > new_name = step_expr;
> > else
> > {
> > + gimple_seq seq = NULL;
> > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > + {
> > + /* When we're using loop_len produced by SELEC_VL, the non-final
> > + iterations are not always processing VF elements. So vectorize
> > + induction variable instead of
> > +
> > + _21 = vect_vec_iv_.6_22 + { VF, ... };
> > +
> > + We should generate:
> > +
> > + _35 = .SELECT_VL (ivtmp_33, VF);
> > + vect_cst__22 = [vec_duplicate_expr] _35;
> > + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> > + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > + tree len
> > + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> > + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> > + unshare_expr (len)),
> > + &seq, true, NULL_TREE);
> > + }
>
> I think it would be better to split out building a tree from VF from both
> arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
>
> Btw, you are not patching the SLP path here which I believe has the same
> problem but is currently exempt from non-constant VF at least.
>
> Richard.
>
> > /* iv_loop is the loop to be vectorized. Generate:
> > vec_step = [VF*S, VF*S, VF*S, VF*S] */
> > - gimple_seq seq = NULL;
> > - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> > + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> > {
> > expr = build_int_cst (integer_type_node, vf);
> > expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> > @@ -10323,8 +10343,13 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > expr, step_expr);
> > if (seq)
> > {
> > - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > - gcc_assert (!new_bb);
> > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> > + else
> > + {
> > + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > + gcc_assert (!new_bb);
> > + }
> > }
> > }
> >
> > @@ -10332,9 +10357,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > gcc_assert (CONSTANT_CLASS_P (new_name)
> > || TREE_CODE (new_name) == SSA_NAME);
> > new_vec = build_vector_from_val (step_vectype, t);
> > - vec_step = vect_init_vector (loop_vinfo, stmt_info,
> > - new_vec, step_vectype, NULL);
> > -
> > + vec_step
> > + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
> > + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
> >
> > /* Create the following def-use cycle:
> > loop prolog:
> > --
> > 2.36.3
> >
>
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
Hi, Richard.
I am sorry for bothering you. I am trying to understand what you mean.
Is this following codes that you want ?
/* Create the vector that holds the step of the induction. */
if (nested_in_vect_loop)
{
/* iv_loop is nested in the loop to be vectorized. Generate:
vec_step = [S, S, S, S] */
new_name = step_expr;
/* We expect LOOP_VINFO_USING_SELECT_VL_P to be false in nested loop. */
gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
t = unshare_expr (new_name);
gcc_assert (CONSTANT_CLASS_P (new_name)
|| TREE_CODE (new_name) == SSA_NAME);
new_vec = build_vector_from_val (step_vectype, t);
vec_step
= vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype, NULL);
}
else if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
{
/* When we're using loop_len produced by SELEC_VL, the non-final
iterations are not always processing VF elements. So vectorize
induction variable instead of
_21 = vect_vec_iv_.6_22 + { VF, ... };
We should generate:
_35 = .SELECT_VL (ivtmp_33, VF);
vect_cst__22 = [vec_duplicate_expr] _35;
_21 = vect_vec_iv_.6_22 + vect_cst__22; */
vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
unshare_expr (len)),
&seq, true, NULL_TREE);
gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
t = unshare_expr (new_name);
gcc_assert (CONSTANT_CLASS_P (new_name)
|| TREE_CODE (new_name) == SSA_NAME);
new_vec = build_vector_from_val (step_vectype, t);
vec_step
= vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype, &si);
}
else
{
/* iv_loop is the loop to be vectorized. Generate:
vec_step = [VF*S, VF*S, VF*S, VF*S] */
gimple_seq seq = NULL;
if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
{
expr = build_int_cst (integer_type_node, vf);
expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
}
else
expr = build_int_cst (TREE_TYPE (step_expr), vf);
new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
expr, step_expr);
if (seq)
{
new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
gcc_assert (!new_bb);
}
t = unshare_expr (new_name);
gcc_assert (CONSTANT_CLASS_P (new_name)
|| TREE_CODE (new_name) == SSA_NAME);
new_vec = build_vector_from_val (step_vectype, t);
vec_step
= vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype, NULL);
}
It seems that this following codes:
t = unshare_expr (new_name);
gcc_assert (CONSTANT_CLASS_P (new_name)
|| TREE_CODE (new_name) == SSA_NAME);
new_vec = build_vector_from_val (step_vectype, t);
vec_step
= vect_init_vector
appears 3 times. I am not sure whether it is the way you want?
Thanks.
juzhe.zhong@rivai.ai
From: Richard Biener
Date: 2023-11-10 17:46
To: 钟居哲
CC: richard.guenther; gcc-patches; richard.sandiford; kito.cheng; kito.cheng
Subject: Re: Re: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
On Thu, 9 Nov 2023, ??? wrote:
> Hi, Richard.
>
> >> I think it would be better to split out building a tree from VF from both
> >> arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
>
> I am trying to split out building tree from both arms as you suggested..
> Could you take a look the following codes ?
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 8abc1937d74..24a86187d11 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -10315,19 +10315,47 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> /* iv_loop is the loop to be vectorized. Generate:
> vec_step = [VF*S, VF*S, VF*S, VF*S] */
> gimple_seq seq = NULL;
> - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> {
> - expr = build_int_cst (integer_type_node, vf);
> - expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> + /* When we're using loop_len produced by SELEC_VL, the non-final
> + iterations are not always processing VF elements. So vectorize
> + induction variable instead of
> +
> + _21 = vect_vec_iv_.6_22 + { VF, ... };
> +
> + We should generate:
> +
> + _35 = .SELECT_VL (ivtmp_33, VF);
> + vect_cst__22 = [vec_duplicate_expr] _35;
> + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> + tree len
> + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> + unshare_expr (len)),
> + &seq, true, NULL_TREE);
> }
> else
> - expr = build_int_cst (TREE_TYPE (step_expr), vf);
> + {
> + bool float_p = SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr));
> + expr = build_int_cst (float_p ? integer_type_node
> + : TREE_TYPE (step_expr),
> + vf);
> + if (float_p)
> + expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> + }
> +
I meant you keep the existing flow in the function, specifically
I think you should handle SCALAR_FLOAT_TYPE_P like it was previously
handled, just build 'vf' in the dynamic way.
> new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
> expr, step_expr);
> if (seq)
> {
> - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> - gcc_assert (!new_bb);
> + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> + else
> + {
> + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> + gcc_assert (!new_bb);
> + }
> }
> }
>
> @@ -10335,9 +10363,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> gcc_assert (CONSTANT_CLASS_P (new_name)
> || TREE_CODE (new_name) == SSA_NAME);
> new_vec = build_vector_from_val (step_vectype, t);
> - vec_step = vect_init_vector (loop_vinfo, stmt_info,
> - new_vec, step_vectype, NULL);
> -
> + vec_step
> + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
> + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
again this makes the flow hard to follow. I suppose refactoring this
overall to
if (nested_in_vect_loop)
...
else if (LOOP_VINFO_USING_SELECT_VL_P (..))
...
else
...
and duplicate this tail into the cases makes it easier to follow.
For nested_in_vect_loop we never have LOOP_VINFO_USING_SELECT_VL_P?
Richard.
> Thanks.
>
>
> juzhe.zhong@rivai.ai
>
> From: Richard Biener
> Date: 2023-11-09 20:16
> To: Juzhe-Zhong
> CC: gcc-patches; richard.sandiford; rguenther; kito.cheng; kito.cheng
> Subject: Re: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
> On Wed, Nov 8, 2023 at 11:53?AM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
> >
> > PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112438
> >
> > SELECT_VL result is not necessary always VF in non-final iteration.
> >
> > Current GIMPLE IR is wrong:
> >
> > # vect_vec_iv_.21_25 = PHI <_24(4), { 0, 1, 2, ... }(3)>
> > ...
> > _24 = vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... };
> >
> > After this patch which is correct for SELECT_VL:
> >
> > # vect_vec_iv_.8_22 = PHI <_21(4), { 0, 1, 2, ... }(3)>
> > ...
> > _35 = .SELECT_VL (ivtmp_33, POLY_INT_CST [4, 4]);
> > _21 = vect_vec_iv_.8_22 + { POLY_INT_CST [4, 4], ... };
> >
> > kito, could you give more explanation ?
> >
> > PR middle/112438
> >
> > gcc/ChangeLog:
> >
> > * tree-vect-loop.cc (vectorizable_induction): Fix bug.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/riscv/rvv/autovec/pr112438.c: New test.
> >
> > ---
> > .../gcc.target/riscv/rvv/autovec/pr112438.c | 35 +++++++++++++++++
> > gcc/tree-vect-loop.cc | 39 +++++++++++++++----
> > 2 files changed, 67 insertions(+), 7 deletions(-)
> > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> >
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> > new file mode 100644
> > index 00000000000..b326d56a52c
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> > @@ -0,0 +1,35 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-vect-cost-model -ffast-math -fdump-tree-optimized-details" } */
> > +
> > +void
> > +foo (int n, int *__restrict in, int *__restrict out)
> > +{
> > + for (int i = 0; i < n; i += 1)
> > + {
> > + out[i] = in[i] + i;
> > + }
> > +}
> > +
> > +void
> > +foo2 (int n, float * __restrict in,
> > +float * __restrict out)
> > +{
> > + for (int i = 0; i < n; i += 1)
> > + {
> > + out[i] = in[i] + i;
> > + }
> > +}
> > +
> > +void
> > +foo3 (int n, float * __restrict in,
> > +float * __restrict out, float x)
> > +{
> > + for (int i = 0; i < n; i += 1)
> > + {
> > + out[i] = in[i] + i* i;
> > + }
> > +}
> > +
> > +/* We don't want to see vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... }. */
> > +/* { dg-final { scan-tree-dump-not "\\+ \{ POLY_INT_CST" "optimized" } } */
> > +
> > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > index a544bc9b059..3e103946168 100644
> > --- a/gcc/tree-vect-loop.cc
> > +++ b/gcc/tree-vect-loop.cc
> > @@ -10309,10 +10309,30 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > new_name = step_expr;
> > else
> > {
> > + gimple_seq seq = NULL;
> > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > + {
> > + /* When we're using loop_len produced by SELEC_VL, the non-final
> > + iterations are not always processing VF elements. So vectorize
> > + induction variable instead of
> > +
> > + _21 = vect_vec_iv_.6_22 + { VF, ... };
> > +
> > + We should generate:
> > +
> > + _35 = .SELECT_VL (ivtmp_33, VF);
> > + vect_cst__22 = [vec_duplicate_expr] _35;
> > + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> > + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > + tree len
> > + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> > + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> > + unshare_expr (len)),
> > + &seq, true, NULL_TREE);
> > + }
>
> I think it would be better to split out building a tree from VF from both
> arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
>
> Btw, you are not patching the SLP path here which I believe has the same
> problem but is currently exempt from non-constant VF at least.
>
> Richard.
>
> > /* iv_loop is the loop to be vectorized. Generate:
> > vec_step = [VF*S, VF*S, VF*S, VF*S] */
> > - gimple_seq seq = NULL;
> > - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> > + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> > {
> > expr = build_int_cst (integer_type_node, vf);
> > expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> > @@ -10323,8 +10343,13 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > expr, step_expr);
> > if (seq)
> > {
> > - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > - gcc_assert (!new_bb);
> > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> > + else
> > + {
> > + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > + gcc_assert (!new_bb);
> > + }
> > }
> > }
> >
> > @@ -10332,9 +10357,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > gcc_assert (CONSTANT_CLASS_P (new_name)
> > || TREE_CODE (new_name) == SSA_NAME);
> > new_vec = build_vector_from_val (step_vectype, t);
> > - vec_step = vect_init_vector (loop_vinfo, stmt_info,
> > - new_vec, step_vectype, NULL);
> > -
> > + vec_step
> > + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
> > + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
> >
> > /* Create the following def-use cycle:
> > loop prolog:
> > --
> > 2.36.3
> >
>
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
On Fri, 10 Nov 2023, juzhe.zhong@rivai.ai wrote:
> Hi, Richard.
>
> I am sorry for bothering you. I am trying to understand what you mean.
>
> Is this following codes that you want ?
>
> /* Create the vector that holds the step of the induction. */
> if (nested_in_vect_loop)
> {
> /* iv_loop is nested in the loop to be vectorized. Generate:
> vec_step = [S, S, S, S] */
> new_name = step_expr;
> /* We expect LOOP_VINFO_USING_SELECT_VL_P to be false in nested loop. */
> gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
> t = unshare_expr (new_name);
> gcc_assert (CONSTANT_CLASS_P (new_name)
> || TREE_CODE (new_name) == SSA_NAME);
> new_vec = build_vector_from_val (step_vectype, t);
> vec_step
> = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype, NULL);
> }
> else if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> {
> /* When we're using loop_len produced by SELEC_VL, the non-final
> iterations are not always processing VF elements. So vectorize
> induction variable instead of
>
> _21 = vect_vec_iv_.6_22 + { VF, ... };
>
> We should generate:
>
> _35 = .SELECT_VL (ivtmp_33, VF);
> vect_cst__22 = [vec_duplicate_expr] _35;
> _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> unshare_expr (len)),
> &seq, true, NULL_TREE);
> gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> t = unshare_expr (new_name);
> gcc_assert (CONSTANT_CLASS_P (new_name)
> || TREE_CODE (new_name) == SSA_NAME);
> new_vec = build_vector_from_val (step_vectype, t);
> vec_step
> = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype, &si);
> }
> else
> {
> /* iv_loop is the loop to be vectorized. Generate:
> vec_step = [VF*S, VF*S, VF*S, VF*S] */
> gimple_seq seq = NULL;
> if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> {
> expr = build_int_cst (integer_type_node, vf);
> expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> }
> else
> expr = build_int_cst (TREE_TYPE (step_expr), vf);
> new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
> expr, step_expr);
> if (seq)
> {
> new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> gcc_assert (!new_bb);
> }
> t = unshare_expr (new_name);
> gcc_assert (CONSTANT_CLASS_P (new_name)
> || TREE_CODE (new_name) == SSA_NAME);
> new_vec = build_vector_from_val (step_vectype, t);
> vec_step
> = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype, NULL);
> }
>
> It seems that this following codes:
>
> t = unshare_expr (new_name);
> gcc_assert (CONSTANT_CLASS_P (new_name)
> || TREE_CODE (new_name) == SSA_NAME);
> new_vec = build_vector_from_val (step_vectype, t);
> vec_step
> = vect_init_vector
>
> appears 3 times. I am not sure whether it is the way you want?
I'd avoid that particular bit by having
gimple_stmt_iterator *si = NULL;
before the if () and set that accordingly only in the
LOOP_VINFO_USING_SELECT_VL_P path. But otherwise yes.
Richard.
>
> Thanks.
>
>
>
> juzhe.zhong@rivai.ai
>
> From: Richard Biener
> Date: 2023-11-10 17:46
> To: ???
> CC: richard.guenther; gcc-patches; richard.sandiford; kito.cheng; kito.cheng
> Subject: Re: Re: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
> On Thu, 9 Nov 2023, ??? wrote:
>
> > Hi, Richard.
> >
> > >> I think it would be better to split out building a tree from VF from both
> > >> arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
> >
> > I am trying to split out building tree from both arms as you suggested..
> > Could you take a look the following codes ?
> >
> > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > index 8abc1937d74..24a86187d11 100644
> > --- a/gcc/tree-vect-loop.cc
> > +++ b/gcc/tree-vect-loop.cc
> > @@ -10315,19 +10315,47 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > /* iv_loop is the loop to be vectorized. Generate:
> > vec_step = [VF*S, VF*S, VF*S, VF*S] */
> > gimple_seq seq = NULL;
> > - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > {
> > - expr = build_int_cst (integer_type_node, vf);
> > - expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> > + /* When we're using loop_len produced by SELEC_VL, the non-final
> > + iterations are not always processing VF elements. So vectorize
> > + induction variable instead of
> > +
> > + _21 = vect_vec_iv_.6_22 + { VF, ... };
> > +
> > + We should generate:
> > +
> > + _35 = .SELECT_VL (ivtmp_33, VF);
> > + vect_cst__22 = [vec_duplicate_expr] _35;
> > + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> > + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > + tree len
> > + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> > + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> > + unshare_expr (len)),
> > + &seq, true, NULL_TREE);
> > }
> > else
> > - expr = build_int_cst (TREE_TYPE (step_expr), vf);
> > + {
> > + bool float_p = SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr));
> > + expr = build_int_cst (float_p ? integer_type_node
> > + : TREE_TYPE (step_expr),
> > + vf);
> > + if (float_p)
> > + expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> > + }
> > +
>
> I meant you keep the existing flow in the function, specifically
> I think you should handle SCALAR_FLOAT_TYPE_P like it was previously
> handled, just build 'vf' in the dynamic way.
>
> > new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
> > expr, step_expr);
> > if (seq)
> > {
> > - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > - gcc_assert (!new_bb);
> > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> > + else
> > + {
> > + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > + gcc_assert (!new_bb);
> > + }
> > }
> > }
> >
> > @@ -10335,9 +10363,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > gcc_assert (CONSTANT_CLASS_P (new_name)
> > || TREE_CODE (new_name) == SSA_NAME);
> > new_vec = build_vector_from_val (step_vectype, t);
> > - vec_step = vect_init_vector (loop_vinfo, stmt_info,
> > - new_vec, step_vectype, NULL);
> > -
> > + vec_step
> > + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
> > + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
>
> again this makes the flow hard to follow. I suppose refactoring this
> overall to
>
> if (nested_in_vect_loop)
> ...
> else if (LOOP_VINFO_USING_SELECT_VL_P (..))
> ...
> else
> ...
>
> and duplicate this tail into the cases makes it easier to follow.
>
> For nested_in_vect_loop we never have LOOP_VINFO_USING_SELECT_VL_P?
>
> Richard.
>
>
> > Thanks.
> >
> >
> > juzhe.zhong@rivai.ai
> >
> > From: Richard Biener
> > Date: 2023-11-09 20:16
> > To: Juzhe-Zhong
> > CC: gcc-patches; richard.sandiford; rguenther; kito.cheng; kito.cheng
> > Subject: Re: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
> > On Wed, Nov 8, 2023 at 11:53?AM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
> > >
> > > PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112438
> > >
> > > SELECT_VL result is not necessary always VF in non-final iteration.
> > >
> > > Current GIMPLE IR is wrong:
> > >
> > > # vect_vec_iv_.21_25 = PHI <_24(4), { 0, 1, 2, ... }(3)>
> > > ...
> > > _24 = vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... };
> > >
> > > After this patch which is correct for SELECT_VL:
> > >
> > > # vect_vec_iv_.8_22 = PHI <_21(4), { 0, 1, 2, ... }(3)>
> > > ...
> > > _35 = .SELECT_VL (ivtmp_33, POLY_INT_CST [4, 4]);
> > > _21 = vect_vec_iv_.8_22 + { POLY_INT_CST [4, 4], ... };
> > >
> > > kito, could you give more explanation ?
> > >
> > > PR middle/112438
> > >
> > > gcc/ChangeLog:
> > >
> > > * tree-vect-loop.cc (vectorizable_induction): Fix bug.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.target/riscv/rvv/autovec/pr112438.c: New test.
> > >
> > > ---
> > > .../gcc.target/riscv/rvv/autovec/pr112438.c | 35 +++++++++++++++++
> > > gcc/tree-vect-loop.cc | 39 +++++++++++++++----
> > > 2 files changed, 67 insertions(+), 7 deletions(-)
> > > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> > >
> > > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> > > new file mode 100644
> > > index 00000000000..b326d56a52c
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> > > @@ -0,0 +1,35 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-vect-cost-model -ffast-math -fdump-tree-optimized-details" } */
> > > +
> > > +void
> > > +foo (int n, int *__restrict in, int *__restrict out)
> > > +{
> > > + for (int i = 0; i < n; i += 1)
> > > + {
> > > + out[i] = in[i] + i;
> > > + }
> > > +}
> > > +
> > > +void
> > > +foo2 (int n, float * __restrict in,
> > > +float * __restrict out)
> > > +{
> > > + for (int i = 0; i < n; i += 1)
> > > + {
> > > + out[i] = in[i] + i;
> > > + }
> > > +}
> > > +
> > > +void
> > > +foo3 (int n, float * __restrict in,
> > > +float * __restrict out, float x)
> > > +{
> > > + for (int i = 0; i < n; i += 1)
> > > + {
> > > + out[i] = in[i] + i* i;
> > > + }
> > > +}
> > > +
> > > +/* We don't want to see vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... }. */
> > > +/* { dg-final { scan-tree-dump-not "\\+ \{ POLY_INT_CST" "optimized" } } */
> > > +
> > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > > index a544bc9b059..3e103946168 100644
> > > --- a/gcc/tree-vect-loop.cc
> > > +++ b/gcc/tree-vect-loop.cc
> > > @@ -10309,10 +10309,30 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > > new_name = step_expr;
> > > else
> > > {
> > > + gimple_seq seq = NULL;
> > > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > > + {
> > > + /* When we're using loop_len produced by SELEC_VL, the non-final
> > > + iterations are not always processing VF elements. So vectorize
> > > + induction variable instead of
> > > +
> > > + _21 = vect_vec_iv_.6_22 + { VF, ... };
> > > +
> > > + We should generate:
> > > +
> > > + _35 = .SELECT_VL (ivtmp_33, VF);
> > > + vect_cst__22 = [vec_duplicate_expr] _35;
> > > + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> > > + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > > + tree len
> > > + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> > > + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> > > + unshare_expr (len)),
> > > + &seq, true, NULL_TREE);
> > > + }
> >
> > I think it would be better to split out building a tree from VF from both
> > arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
> >
> > Btw, you are not patching the SLP path here which I believe has the same
> > problem but is currently exempt from non-constant VF at least.
> >
> > Richard.
> >
> > > /* iv_loop is the loop to be vectorized. Generate:
> > > vec_step = [VF*S, VF*S, VF*S, VF*S] */
> > > - gimple_seq seq = NULL;
> > > - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> > > + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> > > {
> > > expr = build_int_cst (integer_type_node, vf);
> > > expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> > > @@ -10323,8 +10343,13 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > > expr, step_expr);
> > > if (seq)
> > > {
> > > - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > > - gcc_assert (!new_bb);
> > > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > > + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> > > + else
> > > + {
> > > + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > > + gcc_assert (!new_bb);
> > > + }
> > > }
> > > }
> > >
> > > @@ -10332,9 +10357,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > > gcc_assert (CONSTANT_CLASS_P (new_name)
> > > || TREE_CODE (new_name) == SSA_NAME);
> > > new_vec = build_vector_from_val (step_vectype, t);
> > > - vec_step = vect_init_vector (loop_vinfo, stmt_info,
> > > - new_vec, step_vectype, NULL);
> > > -
> > > + vec_step
> > > + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
> > > + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
> > >
> > > /* Create the following def-use cycle:
> > > loop prolog:
> > > --
> > > 2.36.3
> > >
> >
> >
>
>
Thanks a lot. I think I finally understand what you mean now :).
Could you confirm this following codes:?
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 8abc1937d74..5615b16bdcd 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10306,10 +10306,39 @@ vectorizable_induction (loop_vec_info loop_vinfo,
/* Create the vector that holds the step of the induction. */
+ gimple_stmt_iterator *step_iv_si = NULL;
if (nested_in_vect_loop)
- /* iv_loop is nested in the loop to be vectorized. Generate:
- vec_step = [S, S, S, S] */
- new_name = step_expr;
+ {
+ /* iv_loop is nested in the loop to be vectorized. Generate:
+ vec_step = [S, S, S, S] */
+ new_name = step_expr;
+ /* We expect LOOP_VINFO_USING_SELECT_VL_P to be false in nested loop. */
+ gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
+ }
+ else if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+ {
+ /* When we're using loop_len produced by SELEC_VL, the non-final
+ iterations are not always processing VF elements. So vectorize
+ induction variable instead of
+
+ _21 = vect_vec_iv_.6_22 + { VF, ... };
+
+ We should generate:
+
+ _35 = .SELECT_VL (ivtmp_33, VF);
+ vect_cst__22 = [vec_duplicate_expr] _35;
+ _21 = vect_vec_iv_.6_22 + vect_cst__22; */
+ gimple_seq seq = NULL;
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+ tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
+ expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
+ unshare_expr (len)),
+ &seq, true, NULL_TREE);
+ new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr), expr,
+ step_expr);
+ gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
+ step_iv_si = &si;
+ }
else
{
/* iv_loop is the loop to be vectorized. Generate:
@@ -10336,7 +10365,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
|| TREE_CODE (new_name) == SSA_NAME);
new_vec = build_vector_from_val (step_vectype, t);
vec_step = vect_init_vector (loop_vinfo, stmt_info,
- new_vec, step_vectype, NULL);
+ new_vec, step_vectype, step_iv_si);
/* Create the following def-use cycle:
@@ -10382,6 +10411,8 @@ vectorizable_induction (loop_vec_info loop_vinfo,
gimple_seq seq = NULL;
/* FORNOW. This restriction should be relaxed. */
gcc_assert (!nested_in_vect_loop);
+ /* We expect LOOP_VINFO_USING_SELECT_VL_P to be false if ncopies > 1. */
+ gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
If it is Ok for you. I am gonna testing it on X86 bootstrap + regtest.
Thanks.
juzhe.zhong@rivai.ai
From: Richard Biener
Date: 2023-11-10 18:19
To: juzhe.zhong@rivai.ai
CC: Richard Biener; gcc-patches; richard.sandiford; kito.cheng; Kito.cheng
Subject: Re: Re: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
On Fri, 10 Nov 2023, juzhe.zhong@rivai.ai wrote:
> Hi, Richard.
>
> I am sorry for bothering you. I am trying to understand what you mean.
>
> Is this following codes that you want ?
>
> /* Create the vector that holds the step of the induction. */
> if (nested_in_vect_loop)
> {
> /* iv_loop is nested in the loop to be vectorized. Generate:
> vec_step = [S, S, S, S] */
> new_name = step_expr;
> /* We expect LOOP_VINFO_USING_SELECT_VL_P to be false in nested loop. */
> gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
> t = unshare_expr (new_name);
> gcc_assert (CONSTANT_CLASS_P (new_name)
> || TREE_CODE (new_name) == SSA_NAME);
> new_vec = build_vector_from_val (step_vectype, t);
> vec_step
> = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype, NULL);
> }
> else if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> {
> /* When we're using loop_len produced by SELEC_VL, the non-final
> iterations are not always processing VF elements. So vectorize
> induction variable instead of
>
> _21 = vect_vec_iv_.6_22 + { VF, ... };
>
> We should generate:
>
> _35 = .SELECT_VL (ivtmp_33, VF);
> vect_cst__22 = [vec_duplicate_expr] _35;
> _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> unshare_expr (len)),
> &seq, true, NULL_TREE);
> gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> t = unshare_expr (new_name);
> gcc_assert (CONSTANT_CLASS_P (new_name)
> || TREE_CODE (new_name) == SSA_NAME);
> new_vec = build_vector_from_val (step_vectype, t);
> vec_step
> = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype, &si);
> }
> else
> {
> /* iv_loop is the loop to be vectorized. Generate:
> vec_step = [VF*S, VF*S, VF*S, VF*S] */
> gimple_seq seq = NULL;
> if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> {
> expr = build_int_cst (integer_type_node, vf);
> expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> }
> else
> expr = build_int_cst (TREE_TYPE (step_expr), vf);
> new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
> expr, step_expr);
> if (seq)
> {
> new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> gcc_assert (!new_bb);
> }
> t = unshare_expr (new_name);
> gcc_assert (CONSTANT_CLASS_P (new_name)
> || TREE_CODE (new_name) == SSA_NAME);
> new_vec = build_vector_from_val (step_vectype, t);
> vec_step
> = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype, NULL);
> }
>
> It seems that this following codes:
>
> t = unshare_expr (new_name);
> gcc_assert (CONSTANT_CLASS_P (new_name)
> || TREE_CODE (new_name) == SSA_NAME);
> new_vec = build_vector_from_val (step_vectype, t);
> vec_step
> = vect_init_vector
>
> appears 3 times. I am not sure whether it is the way you want?
I'd avoid that particular bit by having
gimple_stmt_iterator *si = NULL;
before the if () and set that accordingly only in the
LOOP_VINFO_USING_SELECT_VL_P path. But otherwise yes.
Richard.
>
> Thanks.
>
>
>
> juzhe.zhong@rivai.ai
>
> From: Richard Biener
> Date: 2023-11-10 17:46
> To: ???
> CC: richard.guenther; gcc-patches; richard.sandiford; kito.cheng; kito.cheng
> Subject: Re: Re: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
> On Thu, 9 Nov 2023, ??? wrote:
>
> > Hi, Richard.
> >
> > >> I think it would be better to split out building a tree from VF from both
> > >> arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
> >
> > I am trying to split out building tree from both arms as you suggested..
> > Could you take a look the following codes ?
> >
> > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > index 8abc1937d74..24a86187d11 100644
> > --- a/gcc/tree-vect-loop.cc
> > +++ b/gcc/tree-vect-loop.cc
> > @@ -10315,19 +10315,47 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > /* iv_loop is the loop to be vectorized. Generate:
> > vec_step = [VF*S, VF*S, VF*S, VF*S] */
> > gimple_seq seq = NULL;
> > - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > {
> > - expr = build_int_cst (integer_type_node, vf);
> > - expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> > + /* When we're using loop_len produced by SELEC_VL, the non-final
> > + iterations are not always processing VF elements. So vectorize
> > + induction variable instead of
> > +
> > + _21 = vect_vec_iv_.6_22 + { VF, ... };
> > +
> > + We should generate:
> > +
> > + _35 = .SELECT_VL (ivtmp_33, VF);
> > + vect_cst__22 = [vec_duplicate_expr] _35;
> > + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> > + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > + tree len
> > + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> > + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> > + unshare_expr (len)),
> > + &seq, true, NULL_TREE);
> > }
> > else
> > - expr = build_int_cst (TREE_TYPE (step_expr), vf);
> > + {
> > + bool float_p = SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr));
> > + expr = build_int_cst (float_p ? integer_type_node
> > + : TREE_TYPE (step_expr),
> > + vf);
> > + if (float_p)
> > + expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> > + }
> > +
>
> I meant you keep the existing flow in the function, specifically
> I think you should handle SCALAR_FLOAT_TYPE_P like it was previously
> handled, just build 'vf' in the dynamic way.
>
> > new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
> > expr, step_expr);
> > if (seq)
> > {
> > - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > - gcc_assert (!new_bb);
> > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> > + else
> > + {
> > + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > + gcc_assert (!new_bb);
> > + }
> > }
> > }
> >
> > @@ -10335,9 +10363,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > gcc_assert (CONSTANT_CLASS_P (new_name)
> > || TREE_CODE (new_name) == SSA_NAME);
> > new_vec = build_vector_from_val (step_vectype, t);
> > - vec_step = vect_init_vector (loop_vinfo, stmt_info,
> > - new_vec, step_vectype, NULL);
> > -
> > + vec_step
> > + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
> > + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
>
> again this makes the flow hard to follow. I suppose refactoring this
> overall to
>
> if (nested_in_vect_loop)
> ...
> else if (LOOP_VINFO_USING_SELECT_VL_P (..))
> ...
> else
> ...
>
> and duplicate this tail into the cases makes it easier to follow.
>
> For nested_in_vect_loop we never have LOOP_VINFO_USING_SELECT_VL_P?
>
> Richard.
>
>
> > Thanks.
> >
> >
> > juzhe.zhong@rivai.ai
> >
> > From: Richard Biener
> > Date: 2023-11-09 20:16
> > To: Juzhe-Zhong
> > CC: gcc-patches; richard.sandiford; rguenther; kito.cheng; kito.cheng
> > Subject: Re: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
> > On Wed, Nov 8, 2023 at 11:53?AM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
> > >
> > > PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112438
> > >
> > > SELECT_VL result is not necessary always VF in non-final iteration.
> > >
> > > Current GIMPLE IR is wrong:
> > >
> > > # vect_vec_iv_.21_25 = PHI <_24(4), { 0, 1, 2, ... }(3)>
> > > ...
> > > _24 = vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... };
> > >
> > > After this patch which is correct for SELECT_VL:
> > >
> > > # vect_vec_iv_.8_22 = PHI <_21(4), { 0, 1, 2, ... }(3)>
> > > ...
> > > _35 = .SELECT_VL (ivtmp_33, POLY_INT_CST [4, 4]);
> > > _21 = vect_vec_iv_.8_22 + { POLY_INT_CST [4, 4], ... };
> > >
> > > kito, could you give more explanation ?
> > >
> > > PR middle/112438
> > >
> > > gcc/ChangeLog:
> > >
> > > * tree-vect-loop.cc (vectorizable_induction): Fix bug.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.target/riscv/rvv/autovec/pr112438.c: New test.
> > >
> > > ---
> > > .../gcc.target/riscv/rvv/autovec/pr112438.c | 35 +++++++++++++++++
> > > gcc/tree-vect-loop.cc | 39 +++++++++++++++----
> > > 2 files changed, 67 insertions(+), 7 deletions(-)
> > > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> > >
> > > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> > > new file mode 100644
> > > index 00000000000..b326d56a52c
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> > > @@ -0,0 +1,35 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-vect-cost-model -ffast-math -fdump-tree-optimized-details" } */
> > > +
> > > +void
> > > +foo (int n, int *__restrict in, int *__restrict out)
> > > +{
> > > + for (int i = 0; i < n; i += 1)
> > > + {
> > > + out[i] = in[i] + i;
> > > + }
> > > +}
> > > +
> > > +void
> > > +foo2 (int n, float * __restrict in,
> > > +float * __restrict out)
> > > +{
> > > + for (int i = 0; i < n; i += 1)
> > > + {
> > > + out[i] = in[i] + i;
> > > + }
> > > +}
> > > +
> > > +void
> > > +foo3 (int n, float * __restrict in,
> > > +float * __restrict out, float x)
> > > +{
> > > + for (int i = 0; i < n; i += 1)
> > > + {
> > > + out[i] = in[i] + i* i;
> > > + }
> > > +}
> > > +
> > > +/* We don't want to see vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... }. */
> > > +/* { dg-final { scan-tree-dump-not "\\+ \{ POLY_INT_CST" "optimized" } } */
> > > +
> > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > > index a544bc9b059..3e103946168 100644
> > > --- a/gcc/tree-vect-loop.cc
> > > +++ b/gcc/tree-vect-loop.cc
> > > @@ -10309,10 +10309,30 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > > new_name = step_expr;
> > > else
> > > {
> > > + gimple_seq seq = NULL;
> > > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > > + {
> > > + /* When we're using loop_len produced by SELEC_VL, the non-final
> > > + iterations are not always processing VF elements. So vectorize
> > > + induction variable instead of
> > > +
> > > + _21 = vect_vec_iv_.6_22 + { VF, ... };
> > > +
> > > + We should generate:
> > > +
> > > + _35 = .SELECT_VL (ivtmp_33, VF);
> > > + vect_cst__22 = [vec_duplicate_expr] _35;
> > > + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> > > + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > > + tree len
> > > + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> > > + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> > > + unshare_expr (len)),
> > > + &seq, true, NULL_TREE);
> > > + }
> >
> > I think it would be better to split out building a tree from VF from both
> > arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
> >
> > Btw, you are not patching the SLP path here which I believe has the same
> > problem but is currently exempt from non-constant VF at least.
> >
> > Richard.
> >
> > > /* iv_loop is the loop to be vectorized. Generate:
> > > vec_step = [VF*S, VF*S, VF*S, VF*S] */
> > > - gimple_seq seq = NULL;
> > > - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> > > + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> > > {
> > > expr = build_int_cst (integer_type_node, vf);
> > > expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> > > @@ -10323,8 +10343,13 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > > expr, step_expr);
> > > if (seq)
> > > {
> > > - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > > - gcc_assert (!new_bb);
> > > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > > + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> > > + else
> > > + {
> > > + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > > + gcc_assert (!new_bb);
> > > + }
> > > }
> > > }
> > >
> > > @@ -10332,9 +10357,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > > gcc_assert (CONSTANT_CLASS_P (new_name)
> > > || TREE_CODE (new_name) == SSA_NAME);
> > > new_vec = build_vector_from_val (step_vectype, t);
> > > - vec_step = vect_init_vector (loop_vinfo, stmt_info,
> > > - new_vec, step_vectype, NULL);
> > > -
> > > + vec_step
> > > + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
> > > + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
> > >
> > > /* Create the following def-use cycle:
> > > loop prolog:
> > > --
> > > 2.36.3
> > >
> >
> >
>
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
On Fri, 10 Nov 2023, juzhe.zhong@rivai.ai wrote:
> Thanks a lot. I think I finally understand what you mean now :).
>
> Could you confirm this following codes:?
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 8abc1937d74..5615b16bdcd 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -10306,10 +10306,39 @@ vectorizable_induction (loop_vec_info loop_vinfo,
>
>
> /* Create the vector that holds the step of the induction. */
> + gimple_stmt_iterator *step_iv_si = NULL;
> if (nested_in_vect_loop)
> - /* iv_loop is nested in the loop to be vectorized. Generate:
> - vec_step = [S, S, S, S] */
> - new_name = step_expr;
> + {
> + /* iv_loop is nested in the loop to be vectorized. Generate:
> + vec_step = [S, S, S, S] */
> + new_name = step_expr;
> + /* We expect LOOP_VINFO_USING_SELECT_VL_P to be false in nested loop. */
> + gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
> + }
> + else if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> + {
> + /* When we're using loop_len produced by SELEC_VL, the non-final
> + iterations are not always processing VF elements. So vectorize
> + induction variable instead of
> +
> + _21 = vect_vec_iv_.6_22 + { VF, ... };
> +
> + We should generate:
> +
> + _35 = .SELECT_VL (ivtmp_33, VF);
> + vect_cst__22 = [vec_duplicate_expr] _35;
> + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> + gimple_seq seq = NULL;
> + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> + tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> + unshare_expr (len)),
> + &seq, true, NULL_TREE);
> + new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr), expr,
> + step_expr);
> + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> + step_iv_si = &si;
> + }
> else
> {
> /* iv_loop is the loop to be vectorized. Generate:
> @@ -10336,7 +10365,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> || TREE_CODE (new_name) == SSA_NAME);
> new_vec = build_vector_from_val (step_vectype, t);
> vec_step = vect_init_vector (loop_vinfo, stmt_info,
> - new_vec, step_vectype, NULL);
> + new_vec, step_vectype, step_iv_si);
>
>
> /* Create the following def-use cycle:
> @@ -10382,6 +10411,8 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> gimple_seq seq = NULL;
> /* FORNOW. This restriction should be relaxed. */
> gcc_assert (!nested_in_vect_loop);
> + /* We expect LOOP_VINFO_USING_SELECT_VL_P to be false if ncopies > 1. */
> + gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
>
>
> If it is Ok for you. I am gonna testing it on X86 bootstrap + regtest.
Yep.
> Thanks.
>
>
>
> juzhe.zhong@rivai.ai
>
> From: Richard Biener
> Date: 2023-11-10 18:19
> To: juzhe.zhong@rivai.ai
> CC: Richard Biener; gcc-patches; richard.sandiford; kito.cheng; Kito.cheng
> Subject: Re: Re: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
> On Fri, 10 Nov 2023, juzhe.zhong@rivai.ai wrote:
>
> > Hi, Richard.
> >
> > I am sorry for bothering you. I am trying to understand what you mean.
> >
> > Is this following codes that you want ?
> >
> > /* Create the vector that holds the step of the induction. */
> > if (nested_in_vect_loop)
> > {
> > /* iv_loop is nested in the loop to be vectorized. Generate:
> > vec_step = [S, S, S, S] */
> > new_name = step_expr;
> > /* We expect LOOP_VINFO_USING_SELECT_VL_P to be false in nested loop. */
> > gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
> > t = unshare_expr (new_name);
> > gcc_assert (CONSTANT_CLASS_P (new_name)
> > || TREE_CODE (new_name) == SSA_NAME);
> > new_vec = build_vector_from_val (step_vectype, t);
> > vec_step
> > = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype, NULL);
> > }
> > else if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > {
> > /* When we're using loop_len produced by SELEC_VL, the non-final
> > iterations are not always processing VF elements. So vectorize
> > induction variable instead of
> >
> > _21 = vect_vec_iv_.6_22 + { VF, ... };
> >
> > We should generate:
> >
> > _35 = .SELECT_VL (ivtmp_33, VF);
> > vect_cst__22 = [vec_duplicate_expr] _35;
> > _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> > vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> > expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> > unshare_expr (len)),
> > &seq, true, NULL_TREE);
> > gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> > t = unshare_expr (new_name);
> > gcc_assert (CONSTANT_CLASS_P (new_name)
> > || TREE_CODE (new_name) == SSA_NAME);
> > new_vec = build_vector_from_val (step_vectype, t);
> > vec_step
> > = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype, &si);
> > }
> > else
> > {
> > /* iv_loop is the loop to be vectorized. Generate:
> > vec_step = [VF*S, VF*S, VF*S, VF*S] */
> > gimple_seq seq = NULL;
> > if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> > {
> > expr = build_int_cst (integer_type_node, vf);
> > expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> > }
> > else
> > expr = build_int_cst (TREE_TYPE (step_expr), vf);
> > new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
> > expr, step_expr);
> > if (seq)
> > {
> > new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > gcc_assert (!new_bb);
> > }
> > t = unshare_expr (new_name);
> > gcc_assert (CONSTANT_CLASS_P (new_name)
> > || TREE_CODE (new_name) == SSA_NAME);
> > new_vec = build_vector_from_val (step_vectype, t);
> > vec_step
> > = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype, NULL);
> > }
> >
> > It seems that this following codes:
> >
> > t = unshare_expr (new_name);
> > gcc_assert (CONSTANT_CLASS_P (new_name)
> > || TREE_CODE (new_name) == SSA_NAME);
> > new_vec = build_vector_from_val (step_vectype, t);
> > vec_step
> > = vect_init_vector
> >
> > appears 3 times. I am not sure whether it is the way you want?
>
> I'd avoid that particular bit by having
>
> gimple_stmt_iterator *si = NULL;
>
> before the if () and set that accordingly only in the
> LOOP_VINFO_USING_SELECT_VL_P path. But otherwise yes.
>
> Richard.
>
> >
> > Thanks.
> >
> >
> >
> > juzhe.zhong@rivai.ai
> >
> > From: Richard Biener
> > Date: 2023-11-10 17:46
> > To: ???
> > CC: richard.guenther; gcc-patches; richard.sandiford; kito.cheng; kito.cheng
> > Subject: Re: Re: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
> > On Thu, 9 Nov 2023, ??? wrote:
> >
> > > Hi, Richard.
> > >
> > > >> I think it would be better to split out building a tree from VF from both
> > > >> arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
> > >
> > > I am trying to split out building tree from both arms as you suggested..
> > > Could you take a look the following codes ?
> > >
> > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > > index 8abc1937d74..24a86187d11 100644
> > > --- a/gcc/tree-vect-loop.cc
> > > +++ b/gcc/tree-vect-loop.cc
> > > @@ -10315,19 +10315,47 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > > /* iv_loop is the loop to be vectorized. Generate:
> > > vec_step = [VF*S, VF*S, VF*S, VF*S] */
> > > gimple_seq seq = NULL;
> > > - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> > > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > > {
> > > - expr = build_int_cst (integer_type_node, vf);
> > > - expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> > > + /* When we're using loop_len produced by SELEC_VL, the non-final
> > > + iterations are not always processing VF elements. So vectorize
> > > + induction variable instead of
> > > +
> > > + _21 = vect_vec_iv_.6_22 + { VF, ... };
> > > +
> > > + We should generate:
> > > +
> > > + _35 = .SELECT_VL (ivtmp_33, VF);
> > > + vect_cst__22 = [vec_duplicate_expr] _35;
> > > + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> > > + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > > + tree len
> > > + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> > > + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> > > + unshare_expr (len)),
> > > + &seq, true, NULL_TREE);
> > > }
> > > else
> > > - expr = build_int_cst (TREE_TYPE (step_expr), vf);
> > > + {
> > > + bool float_p = SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr));
> > > + expr = build_int_cst (float_p ? integer_type_node
> > > + : TREE_TYPE (step_expr),
> > > + vf);
> > > + if (float_p)
> > > + expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> > > + }
> > > +
> >
> > I meant you keep the existing flow in the function, specifically
> > I think you should handle SCALAR_FLOAT_TYPE_P like it was previously
> > handled, just build 'vf' in the dynamic way.
> >
> > > new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
> > > expr, step_expr);
> > > if (seq)
> > > {
> > > - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > > - gcc_assert (!new_bb);
> > > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > > + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> > > + else
> > > + {
> > > + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > > + gcc_assert (!new_bb);
> > > + }
> > > }
> > > }
> > >
> > > @@ -10335,9 +10363,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > > gcc_assert (CONSTANT_CLASS_P (new_name)
> > > || TREE_CODE (new_name) == SSA_NAME);
> > > new_vec = build_vector_from_val (step_vectype, t);
> > > - vec_step = vect_init_vector (loop_vinfo, stmt_info,
> > > - new_vec, step_vectype, NULL);
> > > -
> > > + vec_step
> > > + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
> > > + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
> >
> > again this makes the flow hard to follow. I suppose refactoring this
> > overall to
> >
> > if (nested_in_vect_loop)
> > ...
> > else if (LOOP_VINFO_USING_SELECT_VL_P (..))
> > ...
> > else
> > ...
> >
> > and duplicate this tail into the cases makes it easier to follow.
> >
> > For nested_in_vect_loop we never have LOOP_VINFO_USING_SELECT_VL_P?
> >
> > Richard.
> >
> >
> > > Thanks.
> > >
> > >
> > > juzhe.zhong@rivai.ai
> > >
> > > From: Richard Biener
> > > Date: 2023-11-09 20:16
> > > To: Juzhe-Zhong
> > > CC: gcc-patches; richard.sandiford; rguenther; kito.cheng; kito.cheng
> > > Subject: Re: [PATCH] Middle-end: Fix bug of induction variable vectorization for RVV
> > > On Wed, Nov 8, 2023 at 11:53?AM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
> > > >
> > > > PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112438
> > > >
> > > > SELECT_VL result is not necessary always VF in non-final iteration.
> > > >
> > > > Current GIMPLE IR is wrong:
> > > >
> > > > # vect_vec_iv_.21_25 = PHI <_24(4), { 0, 1, 2, ... }(3)>
> > > > ...
> > > > _24 = vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... };
> > > >
> > > > After this patch which is correct for SELECT_VL:
> > > >
> > > > # vect_vec_iv_.8_22 = PHI <_21(4), { 0, 1, 2, ... }(3)>
> > > > ...
> > > > _35 = .SELECT_VL (ivtmp_33, POLY_INT_CST [4, 4]);
> > > > _21 = vect_vec_iv_.8_22 + { POLY_INT_CST [4, 4], ... };
> > > >
> > > > kito, could you give more explanation ?
> > > >
> > > > PR middle/112438
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > > * tree-vect-loop.cc (vectorizable_induction): Fix bug.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > > * gcc.target/riscv/rvv/autovec/pr112438.c: New test.
> > > >
> > > > ---
> > > > .../gcc.target/riscv/rvv/autovec/pr112438.c | 35 +++++++++++++++++
> > > > gcc/tree-vect-loop.cc | 39 +++++++++++++++----
> > > > 2 files changed, 67 insertions(+), 7 deletions(-)
> > > > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> > > >
> > > > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> > > > new file mode 100644
> > > > index 00000000000..b326d56a52c
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> > > > @@ -0,0 +1,35 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-vect-cost-model -ffast-math -fdump-tree-optimized-details" } */
> > > > +
> > > > +void
> > > > +foo (int n, int *__restrict in, int *__restrict out)
> > > > +{
> > > > + for (int i = 0; i < n; i += 1)
> > > > + {
> > > > + out[i] = in[i] + i;
> > > > + }
> > > > +}
> > > > +
> > > > +void
> > > > +foo2 (int n, float * __restrict in,
> > > > +float * __restrict out)
> > > > +{
> > > > + for (int i = 0; i < n; i += 1)
> > > > + {
> > > > + out[i] = in[i] + i;
> > > > + }
> > > > +}
> > > > +
> > > > +void
> > > > +foo3 (int n, float * __restrict in,
> > > > +float * __restrict out, float x)
> > > > +{
> > > > + for (int i = 0; i < n; i += 1)
> > > > + {
> > > > + out[i] = in[i] + i* i;
> > > > + }
> > > > +}
> > > > +
> > > > +/* We don't want to see vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... }. */
> > > > +/* { dg-final { scan-tree-dump-not "\\+ \{ POLY_INT_CST" "optimized" } } */
> > > > +
> > > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > > > index a544bc9b059..3e103946168 100644
> > > > --- a/gcc/tree-vect-loop.cc
> > > > +++ b/gcc/tree-vect-loop.cc
> > > > @@ -10309,10 +10309,30 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > > > new_name = step_expr;
> > > > else
> > > > {
> > > > + gimple_seq seq = NULL;
> > > > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > > > + {
> > > > + /* When we're using loop_len produced by SELEC_VL, the non-final
> > > > + iterations are not always processing VF elements. So vectorize
> > > > + induction variable instead of
> > > > +
> > > > + _21 = vect_vec_iv_.6_22 + { VF, ... };
> > > > +
> > > > + We should generate:
> > > > +
> > > > + _35 = .SELECT_VL (ivtmp_33, VF);
> > > > + vect_cst__22 = [vec_duplicate_expr] _35;
> > > > + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> > > > + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > > > + tree len
> > > > + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> > > > + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> > > > + unshare_expr (len)),
> > > > + &seq, true, NULL_TREE);
> > > > + }
> > >
> > > I think it would be better to split out building a tree from VF from both
> > > arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
> > >
> > > Btw, you are not patching the SLP path here which I believe has the same
> > > problem but is currently exempt from non-constant VF at least.
> > >
> > > Richard.
> > >
> > > > /* iv_loop is the loop to be vectorized. Generate:
> > > > vec_step = [VF*S, VF*S, VF*S, VF*S] */
> > > > - gimple_seq seq = NULL;
> > > > - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> > > > + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> > > > {
> > > > expr = build_int_cst (integer_type_node, vf);
> > > > expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> > > > @@ -10323,8 +10343,13 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > > > expr, step_expr);
> > > > if (seq)
> > > > {
> > > > - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > > > - gcc_assert (!new_bb);
> > > > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > > > + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> > > > + else
> > > > + {
> > > > + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> > > > + gcc_assert (!new_bb);
> > > > + }
> > > > }
> > > > }
> > > >
> > > > @@ -10332,9 +10357,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > > > gcc_assert (CONSTANT_CLASS_P (new_name)
> > > > || TREE_CODE (new_name) == SSA_NAME);
> > > > new_vec = build_vector_from_val (step_vectype, t);
> > > > - vec_step = vect_init_vector (loop_vinfo, stmt_info,
> > > > - new_vec, step_vectype, NULL);
> > > > -
> > > > + vec_step
> > > > + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
> > > > + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
> > > >
> > > > /* Create the following def-use cycle:
> > > > loop prolog:
> > > > --
> > > > 2.36.3
> > > >
> > >
> > >
> >
> >
>
>
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-vect-cost-model -ffast-math -fdump-tree-optimized-details" } */
+
+void
+foo (int n, int *__restrict in, int *__restrict out)
+{
+ for (int i = 0; i < n; i += 1)
+ {
+ out[i] = in[i] + i;
+ }
+}
+
+void
+foo2 (int n, float * __restrict in,
+float * __restrict out)
+{
+ for (int i = 0; i < n; i += 1)
+ {
+ out[i] = in[i] + i;
+ }
+}
+
+void
+foo3 (int n, float * __restrict in,
+float * __restrict out, float x)
+{
+ for (int i = 0; i < n; i += 1)
+ {
+ out[i] = in[i] + i* i;
+ }
+}
+
+/* We don't want to see vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... }. */
+/* { dg-final { scan-tree-dump-not "\\+ \{ POLY_INT_CST" "optimized" } } */
+
@@ -10309,10 +10309,30 @@ vectorizable_induction (loop_vec_info loop_vinfo,
new_name = step_expr;
else
{
+ gimple_seq seq = NULL;
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+ {
+ /* When we're using loop_len produced by SELEC_VL, the non-final
+ iterations are not always processing VF elements. So vectorize
+ induction variable instead of
+
+ _21 = vect_vec_iv_.6_22 + { VF, ... };
+
+ We should generate:
+
+ _35 = .SELECT_VL (ivtmp_33, VF);
+ vect_cst__22 = [vec_duplicate_expr] _35;
+ _21 = vect_vec_iv_.6_22 + vect_cst__22; */
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+ tree len
+ = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
+ expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
+ unshare_expr (len)),
+ &seq, true, NULL_TREE);
+ }
/* iv_loop is the loop to be vectorized. Generate:
vec_step = [VF*S, VF*S, VF*S, VF*S] */
- gimple_seq seq = NULL;
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
+ else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
{
expr = build_int_cst (integer_type_node, vf);
expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
@@ -10323,8 +10343,13 @@ vectorizable_induction (loop_vec_info loop_vinfo,
expr, step_expr);
if (seq)
{
- new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
- gcc_assert (!new_bb);
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+ gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
+ else
+ {
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
+ gcc_assert (!new_bb);
+ }
}
}
@@ -10332,9 +10357,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
gcc_assert (CONSTANT_CLASS_P (new_name)
|| TREE_CODE (new_name) == SSA_NAME);
new_vec = build_vector_from_val (step_vectype, t);
- vec_step = vect_init_vector (loop_vinfo, stmt_info,
- new_vec, step_vectype, NULL);
-
+ vec_step
+ = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
+ LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
/* Create the following def-use cycle:
loop prolog: