vect: Replace DR_GROUP_STORE_COUNT with DR_GROUP_LAST_ELEMENT

Message ID 31596d79-c607-8180-3399-5013c1b53aef@linux.ibm.com
State Accepted
Headers
Series vect: Replace DR_GROUP_STORE_COUNT with DR_GROUP_LAST_ELEMENT |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Kewen.Lin Aug. 22, 2023, 8:44 a.m. UTC
  Hi,

Now we use DR_GROUP_STORE_COUNT to record how many stores
in a group have been transformed and only do the actual
transform when encountering the last one.  I'm making
patches to move costing next to the transform code, it's
awkward to use this DR_GROUP_STORE_COUNT for both costing
and transforming.  This patch is to introduce last_element
to record the last element to be transformed in the group
rather than to sum up the store number we have seen, then
we can only check the given stmt is the last or not.  It
can make it work simply for both costing and transforming.

Bootstrapped and regtested on x86_64-redhat-linux,
aarch64-linux-gnu and powerpc64{,le}-linux-gnu.

Is it ok for trunk?

BR,
Kewen
-----

gcc/ChangeLog:

	* tree-vect-data-refs.cc (vect_set_group_last_element): New function.
	(vect_analyze_group_access): Call new function
	vect_set_group_last_element.
	* tree-vect-stmts.cc (vectorizable_store): Replace DR_GROUP_STORE_COUNT
	uses with DR_GROUP_LAST_ELEMENT.
	(vect_transform_stmt): Likewise.
	* tree-vect-slp.cc (vect_split_slp_store_group): Likewise.
	(vect_build_slp_instance): Likewise.
	* tree-vectorizer.h (DR_GROUP_LAST_ELEMENT): New macro.
	(DR_GROUP_STORE_COUNT): Remove.
	(class _stmt_vec_info::store_count): Remove.
	(class _stmt_vec_info::last_element): New class member.
	(vect_set_group_last_element): New function declaration.
---
 gcc/tree-vect-data-refs.cc | 30 ++++++++++++++++++++++++++++++
 gcc/tree-vect-slp.cc       | 13 +++++++++----
 gcc/tree-vect-stmts.cc     |  9 +++------
 gcc/tree-vectorizer.h      | 12 +++++++-----
 4 files changed, 49 insertions(+), 15 deletions(-)

--
2.31.1
  

Comments

Richard Biener Aug. 22, 2023, 12:17 p.m. UTC | #1
On Tue, Aug 22, 2023 at 10:44 AM Kewen.Lin <linkw@linux.ibm.com> wrote:
>
> Hi,
>
> Now we use DR_GROUP_STORE_COUNT to record how many stores
> in a group have been transformed and only do the actual
> transform when encountering the last one.  I'm making
> patches to move costing next to the transform code, it's
> awkward to use this DR_GROUP_STORE_COUNT for both costing
> and transforming.  This patch is to introduce last_element
> to record the last element to be transformed in the group
> rather than to sum up the store number we have seen, then
> we can only check the given stmt is the last or not.  It
> can make it work simply for both costing and transforming.
>
> Bootstrapped and regtested on x86_64-redhat-linux,
> aarch64-linux-gnu and powerpc64{,le}-linux-gnu.
>
> Is it ok for trunk?

This is all (existing) gross, so ... can't we do sth like the following
instead?  Going to test this further besides the quick single
testcase I verified.

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 33f62b77710..67de19d9ce5 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8437,16 +8437,6 @@ vectorizable_store (vec_info *vinfo,
       /* FORNOW */
       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));

-      /* We vectorize all the stmts of the interleaving group when we
-        reach the last stmt in the group.  */
-      if (DR_GROUP_STORE_COUNT (first_stmt_info)
-         < DR_GROUP_SIZE (first_stmt_info)
-         && !slp)
-       {
-         *vec_stmt = NULL;
-         return true;
-       }
-
       if (slp)
         {
           grouped_store = false;
@@ -12487,21 +12477,21 @@ vect_transform_stmt (vec_info *vinfo,
       break;

     case store_vec_info_type:
-      done = vectorizable_store (vinfo, stmt_info,
-                                gsi, &vec_stmt, slp_node, NULL);
-      gcc_assert (done);
-      if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
+      if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+         && !slp_node
+         && DR_GROUP_NEXT_ELEMENT (stmt_info))
+       /* In case of interleaving, the whole chain is vectorized when the
+          last store in the chain is reached.  Store stmts before the last
+          one are skipped, and there vec_stmt_info shouldn't be freed
+          meanwhile.  */
+       ;
+      else
        {
-         /* In case of interleaving, the whole chain is vectorized when the
-            last store in the chain is reached.  Store stmts before the last
-            one are skipped, and there vec_stmt_info shouldn't be freed
-            meanwhile.  */
-         stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
-         if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
-           is_store = true;
+         done = vectorizable_store (vinfo, stmt_info,
+                                    gsi, &vec_stmt, slp_node, NULL);
+         gcc_assert (done);
+         is_store = true;
        }
-      else
-       is_store = true;
       break;

     case condition_vec_info_type:


> BR,
> Kewen
> -----
>
> gcc/ChangeLog:
>
>         * tree-vect-data-refs.cc (vect_set_group_last_element): New function.
>         (vect_analyze_group_access): Call new function
>         vect_set_group_last_element.
>         * tree-vect-stmts.cc (vectorizable_store): Replace DR_GROUP_STORE_COUNT
>         uses with DR_GROUP_LAST_ELEMENT.
>         (vect_transform_stmt): Likewise.
>         * tree-vect-slp.cc (vect_split_slp_store_group): Likewise.
>         (vect_build_slp_instance): Likewise.
>         * tree-vectorizer.h (DR_GROUP_LAST_ELEMENT): New macro.
>         (DR_GROUP_STORE_COUNT): Remove.
>         (class _stmt_vec_info::store_count): Remove.
>         (class _stmt_vec_info::last_element): New class member.
>         (vect_set_group_last_element): New function declaration.
> ---
>  gcc/tree-vect-data-refs.cc | 30 ++++++++++++++++++++++++++++++
>  gcc/tree-vect-slp.cc       | 13 +++++++++----
>  gcc/tree-vect-stmts.cc     |  9 +++------
>  gcc/tree-vectorizer.h      | 12 +++++++-----
>  4 files changed, 49 insertions(+), 15 deletions(-)
>
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index 3e9a284666c..c4a495431d5 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -2832,6 +2832,33 @@ vect_analyze_group_access_1 (vec_info *vinfo, dr_vec_info *dr_info)
>    return true;
>  }
>
> +/* Given vectorization information VINFO, set the last element in the
> +   group led by FIRST_STMT_INFO.  For now, it's only used for loop
> +   vectorization and stores, since for loop-vect the grouped stores
> +   are only transformed till encounting its last one.  */
> +
> +void
> +vect_set_group_last_element (vec_info *vinfo, stmt_vec_info first_stmt_info)
> +{
> +  if (first_stmt_info
> +      && is_a<loop_vec_info> (vinfo)
> +      && DR_IS_WRITE (STMT_VINFO_DATA_REF (first_stmt_info)))
> +    {
> +      stmt_vec_info stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
> +      stmt_vec_info last_stmt_info = first_stmt_info;
> +      while (stmt_info)
> +       {
> +         gimple *stmt = stmt_info->stmt;
> +         gimple *last_stmt = last_stmt_info->stmt;
> +         gcc_assert (gimple_bb (stmt) == gimple_bb (last_stmt));
> +         if (gimple_uid (stmt) > gimple_uid (last_stmt))
> +           last_stmt_info = stmt_info;
> +         stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
> +       }
> +      DR_GROUP_LAST_ELEMENT (first_stmt_info) = last_stmt_info;
> +    }
> +}
> +
>  /* Analyze groups of accesses: check that DR_INFO belongs to a group of
>     accesses of legal size, step, etc.  Detect gaps, single element
>     interleaving, and other special cases. Set grouped access info.
> @@ -2853,6 +2880,9 @@ vect_analyze_group_access (vec_info *vinfo, dr_vec_info *dr_info)
>         }
>        return false;
>      }
> +
> +  stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt);
> +  vect_set_group_last_element (vinfo, first_stmt_info);
>    return true;
>  }
>
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 89c3216afac..e9b64efe125 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -2827,7 +2827,8 @@ vect_find_first_scalar_stmt_in_slp (slp_tree node)
>     Return the first stmt in the second group.  */
>
>  static stmt_vec_info
> -vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
> +vect_split_slp_store_group (vec_info *vinfo, stmt_vec_info first_vinfo,
> +                           unsigned group1_size)
>  {
>    gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
>    gcc_assert (group1_size > 0);
> @@ -2860,6 +2861,9 @@ vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
>    /* DR_GROUP_GAP of the first group now has to skip over the second group too.  */
>    DR_GROUP_GAP (first_vinfo) += group2_size;
>
> +  vect_set_group_last_element (vinfo, first_vinfo);
> +  vect_set_group_last_element (vinfo, group2);
> +
>    if (dump_enabled_p ())
>      dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
>                      group1_size, group2_size);
> @@ -3321,7 +3325,7 @@ vect_build_slp_instance (vec_info *vinfo,
>               if (dump_enabled_p ())
>                 dump_printf_loc (MSG_NOTE, vect_location,
>                                  "Splitting SLP group at stmt %u\n", i);
> -             stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
> +             stmt_vec_info rest = vect_split_slp_store_group (vinfo, stmt_info,
>                                                                group1_size);
>               bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
>                                                     kind, max_tree_size,
> @@ -3334,7 +3338,8 @@ vect_build_slp_instance (vec_info *vinfo,
>                       || i - group1_size > 1))
>                 {
>                   stmt_vec_info rest2 = rest;
> -                 rest = vect_split_slp_store_group (rest, i - group1_size);
> +                 rest = vect_split_slp_store_group (vinfo, rest,
> +                                                    i - group1_size);
>                   if (i - group1_size > 1)
>                     res |= vect_analyze_slp_instance (vinfo, bst_map, rest2,
>                                                       kind, max_tree_size,
> @@ -3361,7 +3366,7 @@ vect_build_slp_instance (vec_info *vinfo,
>             dump_printf_loc (MSG_NOTE, vect_location,
>                              "Splitting SLP group at stmt %u\n", i);
>
> -         stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
> +         stmt_vec_info rest = vect_split_slp_store_group (vinfo, stmt_info,
>                                                            group1_size);
>           /* Loop vectorization cannot handle gaps in stores, make sure
>              the split group appears as strided.  */
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 33f62b77710..1580a396301 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -8429,9 +8429,6 @@ vectorizable_store (vec_info *vinfo,
>    else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
>      return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
>
> -  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
> -    DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
> -
>    if (grouped_store)
>      {
>        /* FORNOW */
> @@ -8439,8 +8436,8 @@ vectorizable_store (vec_info *vinfo,
>
>        /* We vectorize all the stmts of the interleaving group when we
>          reach the last stmt in the group.  */
> -      if (DR_GROUP_STORE_COUNT (first_stmt_info)
> -         < DR_GROUP_SIZE (first_stmt_info)
> +      if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
> +         && stmt_info != DR_GROUP_LAST_ELEMENT (first_stmt_info)
>           && !slp)
>         {
>           *vec_stmt = NULL;
> @@ -12497,7 +12494,7 @@ vect_transform_stmt (vec_info *vinfo,
>              one are skipped, and there vec_stmt_info shouldn't be freed
>              meanwhile.  */
>           stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
> -         if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
> +         if (stmt_info == DR_GROUP_LAST_ELEMENT (group_info))
>             is_store = true;
>         }
>        else
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 53a3d78d545..6817e113b56 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -1285,11 +1285,12 @@ public:
>    stmt_vec_info first_element;
>    /* Pointer to the next element in the group.  */
>    stmt_vec_info next_element;
> +  /* Pointer to the last element in the group, for now it's only used
> +     for loop-vect stores since they only get transformed till the
> +     last one is being transformed.  */
> +  stmt_vec_info last_element;
>    /* The size of the group.  */
>    unsigned int size;
> -  /* For stores, number of stores from this group seen. We vectorize the last
> -     one.  */
> -  unsigned int store_count;
>    /* For loads only, the gap from the previous load. For consecutive loads, GAP
>       is 1.  */
>    unsigned int gap;
> @@ -1500,10 +1501,10 @@ struct gather_scatter_info {
>    (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element)
>  #define DR_GROUP_NEXT_ELEMENT(S) \
>    (gcc_checking_assert ((S)->dr_aux.dr), (S)->next_element)
> +#define DR_GROUP_LAST_ELEMENT(S) \
> +  (gcc_checking_assert ((S)->dr_aux.dr), (S)->last_element)
>  #define DR_GROUP_SIZE(S) \
>    (gcc_checking_assert ((S)->dr_aux.dr), (S)->size)
> -#define DR_GROUP_STORE_COUNT(S) \
> -  (gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count)
>  #define DR_GROUP_GAP(S) \
>    (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap)
>
> @@ -2317,6 +2318,7 @@ extern tree vect_get_new_ssa_name (tree, enum vect_var_kind,
>  extern tree vect_create_addr_base_for_vector_ref (vec_info *,
>                                                   stmt_vec_info, gimple_seq *,
>                                                   tree);
> +extern void vect_set_group_last_element (vec_info *, stmt_vec_info);
>
>  /* In tree-vect-loop.cc.  */
>  extern tree neutral_op_for_reduction (tree, code_helper, tree);
> --
> 2.31.1
  
Kewen.Lin Aug. 23, 2023, 2:05 a.m. UTC | #2
Hi Richi,

on 2023/8/22 20:17, Richard Biener wrote:
> On Tue, Aug 22, 2023 at 10:44 AM Kewen.Lin <linkw@linux.ibm.com> wrote:
>>
>> Hi,
>>
>> Now we use DR_GROUP_STORE_COUNT to record how many stores
>> in a group have been transformed and only do the actual
>> transform when encountering the last one.  I'm making
>> patches to move costing next to the transform code, it's
>> awkward to use this DR_GROUP_STORE_COUNT for both costing
>> and transforming.  This patch is to introduce last_element
>> to record the last element to be transformed in the group
>> rather than to sum up the store number we have seen, then
>> we can only check the given stmt is the last or not.  It
>> can make it work simply for both costing and transforming.
>>
>> Bootstrapped and regtested on x86_64-redhat-linux,
>> aarch64-linux-gnu and powerpc64{,le}-linux-gnu.
>>
>> Is it ok for trunk?
> 
> This is all (existing) gross, so ... can't we do sth like the following
> instead?  Going to test this further besides the quick single
> testcase I verified.

I just realized that dealing with this in vect_transform_stmt is super
neat as you questioned and posted, thanks a lot for pushing commit
r14-3383-g2c27600fa79431 for this!

BR,
Kewen


> 
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 33f62b77710..67de19d9ce5 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -8437,16 +8437,6 @@ vectorizable_store (vec_info *vinfo,
>        /* FORNOW */
>        gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
> 
> -      /* We vectorize all the stmts of the interleaving group when we
> -        reach the last stmt in the group.  */
> -      if (DR_GROUP_STORE_COUNT (first_stmt_info)
> -         < DR_GROUP_SIZE (first_stmt_info)
> -         && !slp)
> -       {
> -         *vec_stmt = NULL;
> -         return true;
> -       }
> -
>        if (slp)
>          {
>            grouped_store = false;
> @@ -12487,21 +12477,21 @@ vect_transform_stmt (vec_info *vinfo,
>        break;
> 
>      case store_vec_info_type:
> -      done = vectorizable_store (vinfo, stmt_info,
> -                                gsi, &vec_stmt, slp_node, NULL);
> -      gcc_assert (done);
> -      if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
> +      if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
> +         && !slp_node
> +         && DR_GROUP_NEXT_ELEMENT (stmt_info))
> +       /* In case of interleaving, the whole chain is vectorized when the
> +          last store in the chain is reached.  Store stmts before the last
> +          one are skipped, and there vec_stmt_info shouldn't be freed
> +          meanwhile.  */
> +       ;
> +      else
>         {
> -         /* In case of interleaving, the whole chain is vectorized when the
> -            last store in the chain is reached.  Store stmts before the last
> -            one are skipped, and there vec_stmt_info shouldn't be freed
> -            meanwhile.  */
> -         stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
> -         if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
> -           is_store = true;
> +         done = vectorizable_store (vinfo, stmt_info,
> +                                    gsi, &vec_stmt, slp_node, NULL);
> +         gcc_assert (done);
> +         is_store = true;
>         }
> -      else
> -       is_store = true;
>        break;
> 
>      case condition_vec_info_type:
> 
> 
>> BR,
>> Kewen
>> -----
>>
>> gcc/ChangeLog:
>>
>>         * tree-vect-data-refs.cc (vect_set_group_last_element): New function.
>>         (vect_analyze_group_access): Call new function
>>         vect_set_group_last_element.
>>         * tree-vect-stmts.cc (vectorizable_store): Replace DR_GROUP_STORE_COUNT
>>         uses with DR_GROUP_LAST_ELEMENT.
>>         (vect_transform_stmt): Likewise.
>>         * tree-vect-slp.cc (vect_split_slp_store_group): Likewise.
>>         (vect_build_slp_instance): Likewise.
>>         * tree-vectorizer.h (DR_GROUP_LAST_ELEMENT): New macro.
>>         (DR_GROUP_STORE_COUNT): Remove.
>>         (class _stmt_vec_info::store_count): Remove.
>>         (class _stmt_vec_info::last_element): New class member.
>>         (vect_set_group_last_element): New function declaration.
>> ---
>>  gcc/tree-vect-data-refs.cc | 30 ++++++++++++++++++++++++++++++
>>  gcc/tree-vect-slp.cc       | 13 +++++++++----
>>  gcc/tree-vect-stmts.cc     |  9 +++------
>>  gcc/tree-vectorizer.h      | 12 +++++++-----
>>  4 files changed, 49 insertions(+), 15 deletions(-)
>>
>> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
>> index 3e9a284666c..c4a495431d5 100644
>> --- a/gcc/tree-vect-data-refs.cc
>> +++ b/gcc/tree-vect-data-refs.cc
>> @@ -2832,6 +2832,33 @@ vect_analyze_group_access_1 (vec_info *vinfo, dr_vec_info *dr_info)
>>    return true;
>>  }
>>
>> +/* Given vectorization information VINFO, set the last element in the
>> +   group led by FIRST_STMT_INFO.  For now, it's only used for loop
>> +   vectorization and stores, since for loop-vect the grouped stores
>> +   are only transformed till encounting its last one.  */
>> +
>> +void
>> +vect_set_group_last_element (vec_info *vinfo, stmt_vec_info first_stmt_info)
>> +{
>> +  if (first_stmt_info
>> +      && is_a<loop_vec_info> (vinfo)
>> +      && DR_IS_WRITE (STMT_VINFO_DATA_REF (first_stmt_info)))
>> +    {
>> +      stmt_vec_info stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
>> +      stmt_vec_info last_stmt_info = first_stmt_info;
>> +      while (stmt_info)
>> +       {
>> +         gimple *stmt = stmt_info->stmt;
>> +         gimple *last_stmt = last_stmt_info->stmt;
>> +         gcc_assert (gimple_bb (stmt) == gimple_bb (last_stmt));
>> +         if (gimple_uid (stmt) > gimple_uid (last_stmt))
>> +           last_stmt_info = stmt_info;
>> +         stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
>> +       }
>> +      DR_GROUP_LAST_ELEMENT (first_stmt_info) = last_stmt_info;
>> +    }
>> +}
>> +
>>  /* Analyze groups of accesses: check that DR_INFO belongs to a group of
>>     accesses of legal size, step, etc.  Detect gaps, single element
>>     interleaving, and other special cases. Set grouped access info.
>> @@ -2853,6 +2880,9 @@ vect_analyze_group_access (vec_info *vinfo, dr_vec_info *dr_info)
>>         }
>>        return false;
>>      }
>> +
>> +  stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt);
>> +  vect_set_group_last_element (vinfo, first_stmt_info);
>>    return true;
>>  }
>>
>> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
>> index 89c3216afac..e9b64efe125 100644
>> --- a/gcc/tree-vect-slp.cc
>> +++ b/gcc/tree-vect-slp.cc
>> @@ -2827,7 +2827,8 @@ vect_find_first_scalar_stmt_in_slp (slp_tree node)
>>     Return the first stmt in the second group.  */
>>
>>  static stmt_vec_info
>> -vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
>> +vect_split_slp_store_group (vec_info *vinfo, stmt_vec_info first_vinfo,
>> +                           unsigned group1_size)
>>  {
>>    gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
>>    gcc_assert (group1_size > 0);
>> @@ -2860,6 +2861,9 @@ vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
>>    /* DR_GROUP_GAP of the first group now has to skip over the second group too.  */
>>    DR_GROUP_GAP (first_vinfo) += group2_size;
>>
>> +  vect_set_group_last_element (vinfo, first_vinfo);
>> +  vect_set_group_last_element (vinfo, group2);
>> +
>>    if (dump_enabled_p ())
>>      dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
>>                      group1_size, group2_size);
>> @@ -3321,7 +3325,7 @@ vect_build_slp_instance (vec_info *vinfo,
>>               if (dump_enabled_p ())
>>                 dump_printf_loc (MSG_NOTE, vect_location,
>>                                  "Splitting SLP group at stmt %u\n", i);
>> -             stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
>> +             stmt_vec_info rest = vect_split_slp_store_group (vinfo, stmt_info,
>>                                                                group1_size);
>>               bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
>>                                                     kind, max_tree_size,
>> @@ -3334,7 +3338,8 @@ vect_build_slp_instance (vec_info *vinfo,
>>                       || i - group1_size > 1))
>>                 {
>>                   stmt_vec_info rest2 = rest;
>> -                 rest = vect_split_slp_store_group (rest, i - group1_size);
>> +                 rest = vect_split_slp_store_group (vinfo, rest,
>> +                                                    i - group1_size);
>>                   if (i - group1_size > 1)
>>                     res |= vect_analyze_slp_instance (vinfo, bst_map, rest2,
>>                                                       kind, max_tree_size,
>> @@ -3361,7 +3366,7 @@ vect_build_slp_instance (vec_info *vinfo,
>>             dump_printf_loc (MSG_NOTE, vect_location,
>>                              "Splitting SLP group at stmt %u\n", i);
>>
>> -         stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
>> +         stmt_vec_info rest = vect_split_slp_store_group (vinfo, stmt_info,
>>                                                            group1_size);
>>           /* Loop vectorization cannot handle gaps in stores, make sure
>>              the split group appears as strided.  */
>> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
>> index 33f62b77710..1580a396301 100644
>> --- a/gcc/tree-vect-stmts.cc
>> +++ b/gcc/tree-vect-stmts.cc
>> @@ -8429,9 +8429,6 @@ vectorizable_store (vec_info *vinfo,
>>    else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
>>      return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
>>
>> -  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
>> -    DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
>> -
>>    if (grouped_store)
>>      {
>>        /* FORNOW */
>> @@ -8439,8 +8436,8 @@ vectorizable_store (vec_info *vinfo,
>>
>>        /* We vectorize all the stmts of the interleaving group when we
>>          reach the last stmt in the group.  */
>> -      if (DR_GROUP_STORE_COUNT (first_stmt_info)
>> -         < DR_GROUP_SIZE (first_stmt_info)
>> +      if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
>> +         && stmt_info != DR_GROUP_LAST_ELEMENT (first_stmt_info)
>>           && !slp)
>>         {
>>           *vec_stmt = NULL;
>> @@ -12497,7 +12494,7 @@ vect_transform_stmt (vec_info *vinfo,
>>              one are skipped, and there vec_stmt_info shouldn't be freed
>>              meanwhile.  */
>>           stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
>> -         if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
>> +         if (stmt_info == DR_GROUP_LAST_ELEMENT (group_info))
>>             is_store = true;
>>         }
>>        else
>> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
>> index 53a3d78d545..6817e113b56 100644
>> --- a/gcc/tree-vectorizer.h
>> +++ b/gcc/tree-vectorizer.h
>> @@ -1285,11 +1285,12 @@ public:
>>    stmt_vec_info first_element;
>>    /* Pointer to the next element in the group.  */
>>    stmt_vec_info next_element;
>> +  /* Pointer to the last element in the group, for now it's only used
>> +     for loop-vect stores since they only get transformed till the
>> +     last one is being transformed.  */
>> +  stmt_vec_info last_element;
>>    /* The size of the group.  */
>>    unsigned int size;
>> -  /* For stores, number of stores from this group seen. We vectorize the last
>> -     one.  */
>> -  unsigned int store_count;
>>    /* For loads only, the gap from the previous load. For consecutive loads, GAP
>>       is 1.  */
>>    unsigned int gap;
>> @@ -1500,10 +1501,10 @@ struct gather_scatter_info {
>>    (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element)
>>  #define DR_GROUP_NEXT_ELEMENT(S) \
>>    (gcc_checking_assert ((S)->dr_aux.dr), (S)->next_element)
>> +#define DR_GROUP_LAST_ELEMENT(S) \
>> +  (gcc_checking_assert ((S)->dr_aux.dr), (S)->last_element)
>>  #define DR_GROUP_SIZE(S) \
>>    (gcc_checking_assert ((S)->dr_aux.dr), (S)->size)
>> -#define DR_GROUP_STORE_COUNT(S) \
>> -  (gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count)
>>  #define DR_GROUP_GAP(S) \
>>    (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap)
>>
>> @@ -2317,6 +2318,7 @@ extern tree vect_get_new_ssa_name (tree, enum vect_var_kind,
>>  extern tree vect_create_addr_base_for_vector_ref (vec_info *,
>>                                                   stmt_vec_info, gimple_seq *,
>>                                                   tree);
>> +extern void vect_set_group_last_element (vec_info *, stmt_vec_info);
>>
>>  /* In tree-vect-loop.cc.  */
>>  extern tree neutral_op_for_reduction (tree, code_helper, tree);
>> --
>> 2.31.1
  

Patch

diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 3e9a284666c..c4a495431d5 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -2832,6 +2832,33 @@  vect_analyze_group_access_1 (vec_info *vinfo, dr_vec_info *dr_info)
   return true;
 }

+/* Given vectorization information VINFO, set the last element in the
+   group led by FIRST_STMT_INFO.  For now, it's only used for loop
+   vectorization and stores, since for loop-vect the grouped stores
+   are only transformed till encounting its last one.  */
+
+void
+vect_set_group_last_element (vec_info *vinfo, stmt_vec_info first_stmt_info)
+{
+  if (first_stmt_info
+      && is_a<loop_vec_info> (vinfo)
+      && DR_IS_WRITE (STMT_VINFO_DATA_REF (first_stmt_info)))
+    {
+      stmt_vec_info stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
+      stmt_vec_info last_stmt_info = first_stmt_info;
+      while (stmt_info)
+	{
+	  gimple *stmt = stmt_info->stmt;
+	  gimple *last_stmt = last_stmt_info->stmt;
+	  gcc_assert (gimple_bb (stmt) == gimple_bb (last_stmt));
+	  if (gimple_uid (stmt) > gimple_uid (last_stmt))
+	    last_stmt_info = stmt_info;
+	  stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
+	}
+      DR_GROUP_LAST_ELEMENT (first_stmt_info) = last_stmt_info;
+    }
+}
+
 /* Analyze groups of accesses: check that DR_INFO belongs to a group of
    accesses of legal size, step, etc.  Detect gaps, single element
    interleaving, and other special cases. Set grouped access info.
@@ -2853,6 +2880,9 @@  vect_analyze_group_access (vec_info *vinfo, dr_vec_info *dr_info)
 	}
       return false;
     }
+
+  stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt);
+  vect_set_group_last_element (vinfo, first_stmt_info);
   return true;
 }

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 89c3216afac..e9b64efe125 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2827,7 +2827,8 @@  vect_find_first_scalar_stmt_in_slp (slp_tree node)
    Return the first stmt in the second group.  */

 static stmt_vec_info
-vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
+vect_split_slp_store_group (vec_info *vinfo, stmt_vec_info first_vinfo,
+			    unsigned group1_size)
 {
   gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
   gcc_assert (group1_size > 0);
@@ -2860,6 +2861,9 @@  vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
   /* DR_GROUP_GAP of the first group now has to skip over the second group too.  */
   DR_GROUP_GAP (first_vinfo) += group2_size;

+  vect_set_group_last_element (vinfo, first_vinfo);
+  vect_set_group_last_element (vinfo, group2);
+
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
 		     group1_size, group2_size);
@@ -3321,7 +3325,7 @@  vect_build_slp_instance (vec_info *vinfo,
 	      if (dump_enabled_p ())
 		dump_printf_loc (MSG_NOTE, vect_location,
 				 "Splitting SLP group at stmt %u\n", i);
-	      stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
+	      stmt_vec_info rest = vect_split_slp_store_group (vinfo, stmt_info,
 							       group1_size);
 	      bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
 						    kind, max_tree_size,
@@ -3334,7 +3338,8 @@  vect_build_slp_instance (vec_info *vinfo,
 		      || i - group1_size > 1))
 		{
 		  stmt_vec_info rest2 = rest;
-		  rest = vect_split_slp_store_group (rest, i - group1_size);
+		  rest = vect_split_slp_store_group (vinfo, rest,
+						     i - group1_size);
 		  if (i - group1_size > 1)
 		    res |= vect_analyze_slp_instance (vinfo, bst_map, rest2,
 						      kind, max_tree_size,
@@ -3361,7 +3366,7 @@  vect_build_slp_instance (vec_info *vinfo,
 	    dump_printf_loc (MSG_NOTE, vect_location,
 			     "Splitting SLP group at stmt %u\n", i);

-	  stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
+	  stmt_vec_info rest = vect_split_slp_store_group (vinfo, stmt_info,
 							   group1_size);
 	  /* Loop vectorization cannot handle gaps in stores, make sure
 	     the split group appears as strided.  */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 33f62b77710..1580a396301 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8429,9 +8429,6 @@  vectorizable_store (vec_info *vinfo,
   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
     return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);

-  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
-    DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
-
   if (grouped_store)
     {
       /* FORNOW */
@@ -8439,8 +8436,8 @@  vectorizable_store (vec_info *vinfo,

       /* We vectorize all the stmts of the interleaving group when we
 	 reach the last stmt in the group.  */
-      if (DR_GROUP_STORE_COUNT (first_stmt_info)
-	  < DR_GROUP_SIZE (first_stmt_info)
+      if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+	  && stmt_info != DR_GROUP_LAST_ELEMENT (first_stmt_info)
 	  && !slp)
 	{
 	  *vec_stmt = NULL;
@@ -12497,7 +12494,7 @@  vect_transform_stmt (vec_info *vinfo,
 	     one are skipped, and there vec_stmt_info shouldn't be freed
 	     meanwhile.  */
 	  stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
-	  if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
+	  if (stmt_info == DR_GROUP_LAST_ELEMENT (group_info))
 	    is_store = true;
 	}
       else
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 53a3d78d545..6817e113b56 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1285,11 +1285,12 @@  public:
   stmt_vec_info first_element;
   /* Pointer to the next element in the group.  */
   stmt_vec_info next_element;
+  /* Pointer to the last element in the group, for now it's only used
+     for loop-vect stores since they only get transformed till the
+     last one is being transformed.  */
+  stmt_vec_info last_element;
   /* The size of the group.  */
   unsigned int size;
-  /* For stores, number of stores from this group seen. We vectorize the last
-     one.  */
-  unsigned int store_count;
   /* For loads only, the gap from the previous load. For consecutive loads, GAP
      is 1.  */
   unsigned int gap;
@@ -1500,10 +1501,10 @@  struct gather_scatter_info {
   (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element)
 #define DR_GROUP_NEXT_ELEMENT(S) \
   (gcc_checking_assert ((S)->dr_aux.dr), (S)->next_element)
+#define DR_GROUP_LAST_ELEMENT(S) \
+  (gcc_checking_assert ((S)->dr_aux.dr), (S)->last_element)
 #define DR_GROUP_SIZE(S) \
   (gcc_checking_assert ((S)->dr_aux.dr), (S)->size)
-#define DR_GROUP_STORE_COUNT(S) \
-  (gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count)
 #define DR_GROUP_GAP(S) \
   (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap)

@@ -2317,6 +2318,7 @@  extern tree vect_get_new_ssa_name (tree, enum vect_var_kind,
 extern tree vect_create_addr_base_for_vector_ref (vec_info *,
 						  stmt_vec_info, gimple_seq *,
 						  tree);
+extern void vect_set_group_last_element (vec_info *, stmt_vec_info);

 /* In tree-vect-loop.cc.  */
 extern tree neutral_op_for_reduction (tree, code_helper, tree);