VECT: Fix ICE of variable stride on strieded load/store with SELECT_VL loop control.
Checks
Commit Message
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
Hi, Richi.
Sorry for making mistake on LEN_MASK_GATHER_LOAD/LEN_MASK_SCATTER_STORE
with SELECT_VL loop control.
Consider this following case:
#define TEST_LOOP(DATA_TYPE, BITS) \
void __attribute__ ((noinline, noclone)) \
f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
INDEX##BITS stride, INDEX##BITS n) \
{ \
for (INDEX##BITS i = 0; i < n; ++i) \
dest[i] += src[i * stride]; \
}
When "stride" is a constant, current flow works fine.
However, when "stride" is a variable. It causes an ICE:
# vectp_src.67_85 = PHI <vectp_src.67_86(6), src_21(D)(12)>
...
_96 = .SELECT_VL (ivtmp_94, 4);
...
ivtmp_78 = ((sizetype) _39 * (sizetype) _96) * 4;
vect__11.69_87 = .LEN_MASK_GATHER_LOAD (vectp_src.67_85, _84, 4, { 0, 0, 0, 0 }, { -1, -1, -1, -1 }, _96, 0);
...
vectp_src.67_86 = vectp_src.67_85 + ivtmp_78;
Becase the IR: ivtmp_78 = ((sizetype) _39 * (sizetype) _96) * 4;
Instead, I split the IR into:
step_stride = _39
step = step_stride * 4
ivtmp_78 = step * _96
I don't think this patch's code is elegant enough, could you help me refine these codes?
Thanks.
gcc/ChangeLog:
* tree-vect-stmts.cc (vect_get_strided_load_store_ops): Fix ICE.
---
gcc/tree-vect-stmts.cc | 38 +++++++++++++++++++++++++++++++++-----
1 file changed, 33 insertions(+), 5 deletions(-)
Comments
On Thu, 6 Jul 2023, juzhe.zhong@rivai.ai wrote:
> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>
> Hi, Richi.
>
> Sorry for making mistake on LEN_MASK_GATHER_LOAD/LEN_MASK_SCATTER_STORE
> with SELECT_VL loop control.
>
> Consider this following case:
> #define TEST_LOOP(DATA_TYPE, BITS) \
> void __attribute__ ((noinline, noclone)) \
> f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
> INDEX##BITS stride, INDEX##BITS n) \
> { \
> for (INDEX##BITS i = 0; i < n; ++i) \
> dest[i] += src[i * stride]; \
> }
>
> When "stride" is a constant, current flow works fine.
> However, when "stride" is a variable. It causes an ICE:
> # vectp_src.67_85 = PHI <vectp_src.67_86(6), src_21(D)(12)>
> ...
> _96 = .SELECT_VL (ivtmp_94, 4);
> ...
> ivtmp_78 = ((sizetype) _39 * (sizetype) _96) * 4;
> vect__11.69_87 = .LEN_MASK_GATHER_LOAD (vectp_src.67_85, _84, 4, { 0, 0, 0, 0 }, { -1, -1, -1, -1 }, _96, 0);
> ...
> vectp_src.67_86 = vectp_src.67_85 + ivtmp_78;
>
> Becase the IR: ivtmp_78 = ((sizetype) _39 * (sizetype) _96) * 4;
>
> Instead, I split the IR into:
>
> step_stride = _39
> step = step_stride * 4
> ivtmp_78 = step * _96
>
> I don't think this patch's code is elegant enough, could you help me refine these codes?
>
> Thanks.
>
> gcc/ChangeLog:
>
> * tree-vect-stmts.cc (vect_get_strided_load_store_ops): Fix ICE.
>
> ---
> gcc/tree-vect-stmts.cc | 38 +++++++++++++++++++++++++++++++++-----
> 1 file changed, 33 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index c10a4be60eb..12d1b0f1ac0 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -3172,12 +3172,40 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
> vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
> tree loop_len
> = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
> - tree tmp
> - = fold_build2 (MULT_EXPR, sizetype,
> - fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
> - loop_len);
> + tree tmp;
> + gassign *assign;
> +
> + if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
> + tmp = fold_build2 (MULT_EXPR, sizetype,
> + fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
> + loop_len);
> + else
> + {
> + /* If DR_STEP = (unsigned int) _37 * 4;
> + Extract _37 and 4, explicit MULT_EXPR. */
> +
> + /* 1. step_stride = (unsigned int) _37. */
> + tree step_stride = make_ssa_name (create_tmp_var (sizetype));
> + assign = gimple_build_assign (
> + step_stride, TREE_OPERAND (TREE_OPERAND (DR_STEP (dr), 0), 0));
> + gsi_insert_before (gsi, assign, GSI_SAME_STMT);
> +
> + /* 2. step = step_stride * 4. */
> + tree step_align = TREE_OPERAND (TREE_OPERAND (DR_STEP (dr), 0), 1);
> + tree step = make_ssa_name (create_tmp_var (sizetype));
> + assign
> + = gimple_build_assign (step, fold_build2 (MULT_EXPR, sizetype,
> + step_stride, step_align));
> + gsi_insert_before (gsi, assign, GSI_SAME_STMT);
> +
> + /* 3. tmp = step * loop_len. */
> + tmp = make_ssa_name (create_tmp_var (sizetype));
> + assign = gimple_build_assign (tmp, fold_build2 (MULT_EXPR, sizetype,
> + step, loop_len));
> + gsi_insert_before (gsi, assign, GSI_SAME_STMT);
> + }
> tree bump = make_temp_ssa_name (sizetype, NULL, "ivtmp");
> - gassign *assign = gimple_build_assign (bump, tmp);
instead of
tree bump = make_temp_ssa_name (sizetype, NULL, "ivtmp");
gassign *assign = gimple_build_assign (bump, tmp);
you can simply do
tree bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE,
true, GSI_SAME_STMT);
That's all that is needed.
Richard.
> + assign = gimple_build_assign (bump, tmp);
> gsi_insert_before (gsi, assign, GSI_SAME_STMT);
> *dataref_bump = bump;
> }
>
Thank you so much.
I have sent V2:
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/623734.html
which is working fine for both stride = constant and variable.
Could you take a look at it?
Thanks.
juzhe.zhong@rivai.ai
From: Richard Biener
Date: 2023-07-06 14:43
To: Ju-Zhe Zhong
CC: gcc-patches; richard.sandiford
Subject: Re: [PATCH] VECT: Fix ICE of variable stride on strieded load/store with SELECT_VL loop control.
On Thu, 6 Jul 2023, juzhe.zhong@rivai.ai wrote:
> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>
> Hi, Richi.
>
> Sorry for making mistake on LEN_MASK_GATHER_LOAD/LEN_MASK_SCATTER_STORE
> with SELECT_VL loop control.
>
> Consider this following case:
> #define TEST_LOOP(DATA_TYPE, BITS) \
> void __attribute__ ((noinline, noclone)) \
> f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
> INDEX##BITS stride, INDEX##BITS n) \
> { \
> for (INDEX##BITS i = 0; i < n; ++i) \
> dest[i] += src[i * stride]; \
> }
>
> When "stride" is a constant, current flow works fine.
> However, when "stride" is a variable. It causes an ICE:
> # vectp_src.67_85 = PHI <vectp_src.67_86(6), src_21(D)(12)>
> ...
> _96 = .SELECT_VL (ivtmp_94, 4);
> ...
> ivtmp_78 = ((sizetype) _39 * (sizetype) _96) * 4;
> vect__11.69_87 = .LEN_MASK_GATHER_LOAD (vectp_src.67_85, _84, 4, { 0, 0, 0, 0 }, { -1, -1, -1, -1 }, _96, 0);
> ...
> vectp_src.67_86 = vectp_src.67_85 + ivtmp_78;
>
> Becase the IR: ivtmp_78 = ((sizetype) _39 * (sizetype) _96) * 4;
>
> Instead, I split the IR into:
>
> step_stride = _39
> step = step_stride * 4
> ivtmp_78 = step * _96
>
> I don't think this patch's code is elegant enough, could you help me refine these codes?
>
> Thanks.
>
> gcc/ChangeLog:
>
> * tree-vect-stmts.cc (vect_get_strided_load_store_ops): Fix ICE.
>
> ---
> gcc/tree-vect-stmts.cc | 38 +++++++++++++++++++++++++++++++++-----
> 1 file changed, 33 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index c10a4be60eb..12d1b0f1ac0 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -3172,12 +3172,40 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
> vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
> tree loop_len
> = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
> - tree tmp
> - = fold_build2 (MULT_EXPR, sizetype,
> - fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
> - loop_len);
> + tree tmp;
> + gassign *assign;
> +
> + if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
> + tmp = fold_build2 (MULT_EXPR, sizetype,
> + fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
> + loop_len);
> + else
> + {
> + /* If DR_STEP = (unsigned int) _37 * 4;
> + Extract _37 and 4, explicit MULT_EXPR. */
> +
> + /* 1. step_stride = (unsigned int) _37. */
> + tree step_stride = make_ssa_name (create_tmp_var (sizetype));
> + assign = gimple_build_assign (
> + step_stride, TREE_OPERAND (TREE_OPERAND (DR_STEP (dr), 0), 0));
> + gsi_insert_before (gsi, assign, GSI_SAME_STMT);
> +
> + /* 2. step = step_stride * 4. */
> + tree step_align = TREE_OPERAND (TREE_OPERAND (DR_STEP (dr), 0), 1);
> + tree step = make_ssa_name (create_tmp_var (sizetype));
> + assign
> + = gimple_build_assign (step, fold_build2 (MULT_EXPR, sizetype,
> + step_stride, step_align));
> + gsi_insert_before (gsi, assign, GSI_SAME_STMT);
> +
> + /* 3. tmp = step * loop_len. */
> + tmp = make_ssa_name (create_tmp_var (sizetype));
> + assign = gimple_build_assign (tmp, fold_build2 (MULT_EXPR, sizetype,
> + step, loop_len));
> + gsi_insert_before (gsi, assign, GSI_SAME_STMT);
> + }
> tree bump = make_temp_ssa_name (sizetype, NULL, "ivtmp");
> - gassign *assign = gimple_build_assign (bump, tmp);
instead of
tree bump = make_temp_ssa_name (sizetype, NULL, "ivtmp");
gassign *assign = gimple_build_assign (bump, tmp);
you can simply do
tree bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE,
true, GSI_SAME_STMT);
That's all that is needed.
Richard.
> + assign = gimple_build_assign (bump, tmp);
> gsi_insert_before (gsi, assign, GSI_SAME_STMT);
> *dataref_bump = bump;
> }
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)
@@ -3172,12 +3172,40 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
tree loop_len
= vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
- tree tmp
- = fold_build2 (MULT_EXPR, sizetype,
- fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
- loop_len);
+ tree tmp;
+ gassign *assign;
+
+ if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
+ tmp = fold_build2 (MULT_EXPR, sizetype,
+ fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
+ loop_len);
+ else
+ {
+ /* If DR_STEP = (unsigned int) _37 * 4;
+ Extract _37 and 4, explicit MULT_EXPR. */
+
+ /* 1. step_stride = (unsigned int) _37. */
+ tree step_stride = make_ssa_name (create_tmp_var (sizetype));
+ assign = gimple_build_assign (
+ step_stride, TREE_OPERAND (TREE_OPERAND (DR_STEP (dr), 0), 0));
+ gsi_insert_before (gsi, assign, GSI_SAME_STMT);
+
+ /* 2. step = step_stride * 4. */
+ tree step_align = TREE_OPERAND (TREE_OPERAND (DR_STEP (dr), 0), 1);
+ tree step = make_ssa_name (create_tmp_var (sizetype));
+ assign
+ = gimple_build_assign (step, fold_build2 (MULT_EXPR, sizetype,
+ step_stride, step_align));
+ gsi_insert_before (gsi, assign, GSI_SAME_STMT);
+
+ /* 3. tmp = step * loop_len. */
+ tmp = make_ssa_name (create_tmp_var (sizetype));
+ assign = gimple_build_assign (tmp, fold_build2 (MULT_EXPR, sizetype,
+ step, loop_len));
+ gsi_insert_before (gsi, assign, GSI_SAME_STMT);
+ }
tree bump = make_temp_ssa_name (sizetype, NULL, "ivtmp");
- gassign *assign = gimple_build_assign (bump, tmp);
+ assign = gimple_build_assign (bump, tmp);
gsi_insert_before (gsi, assign, GSI_SAME_STMT);
*dataref_bump = bump;
}