VECT: Support mask_len_strided_load/mask_len_strided_store in loop vectorize
Checks
Commit Message
This patch support loop vectorizer generate direct strided load/store IFN
if targets enable it.
Note that this patch provide the ability that target enabling strided load/store but without gather/scatter
can vectorize stride memory access.
gcc/ChangeLog:
* optabs-query.cc (supports_vec_gather_load_p): Support strided load/store.
(supports_vec_scatter_store_p): Ditto.
* optabs-query.h (supports_vec_gather_load_p): Ditto.
(supports_vec_scatter_store_p): Ditto.
* tree-vect-data-refs.cc (vect_gather_scatter_fn_p): Ditto.
(vect_check_gather_scatter): Ditto.
* tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto.
(vect_truncate_gather_scatter_offset): Ditto.
(vect_use_strided_gather_scatters_p): Ditto.
(vect_get_strided_load_store_ops): Ditto.
(vectorizable_store): Ditto.
(vectorizable_load): Ditto.
* tree-vectorizer.h (vect_gather_scatter_fn_p): Ditto.
---
gcc/optabs-query.cc | 27 ++++++++++-----
gcc/optabs-query.h | 4 +--
gcc/tree-vect-data-refs.cc | 71 ++++++++++++++++++++++++++++----------
gcc/tree-vect-stmts.cc | 46 +++++++++++++++++-------
gcc/tree-vectorizer.h | 3 +-
5 files changed, 109 insertions(+), 42 deletions(-)
Comments
Passed the x86 bootstrap and regression tests.
Pan
-----Original Message-----
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
Sent: Tuesday, October 31, 2023 6:08 PM
To: gcc-patches@gcc.gnu.org
Cc: richard.sandiford@arm.com; rguenther@suse.de; jeffreyalaw@gmail.com; Juzhe-Zhong <juzhe.zhong@rivai.ai>
Subject: [PATCH] VECT: Support mask_len_strided_load/mask_len_strided_store in loop vectorize
This patch support loop vectorizer generate direct strided load/store IFN
if targets enable it.
Note that this patch provide the ability that target enabling strided load/store but without gather/scatter
can vectorize stride memory access.
gcc/ChangeLog:
* optabs-query.cc (supports_vec_gather_load_p): Support strided load/store.
(supports_vec_scatter_store_p): Ditto.
* optabs-query.h (supports_vec_gather_load_p): Ditto.
(supports_vec_scatter_store_p): Ditto.
* tree-vect-data-refs.cc (vect_gather_scatter_fn_p): Ditto.
(vect_check_gather_scatter): Ditto.
* tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto.
(vect_truncate_gather_scatter_offset): Ditto.
(vect_use_strided_gather_scatters_p): Ditto.
(vect_get_strided_load_store_ops): Ditto.
(vectorizable_store): Ditto.
(vectorizable_load): Ditto.
* tree-vectorizer.h (vect_gather_scatter_fn_p): Ditto.
---
gcc/optabs-query.cc | 27 ++++++++++-----
gcc/optabs-query.h | 4 +--
gcc/tree-vect-data-refs.cc | 71 ++++++++++++++++++++++++++++----------
gcc/tree-vect-stmts.cc | 46 +++++++++++++++++-------
gcc/tree-vectorizer.h | 3 +-
5 files changed, 109 insertions(+), 42 deletions(-)
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 947ccef218c..ea594baf15d 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -670,14 +670,19 @@ supports_vec_convert_optab_p (optab op, machine_mode mode)
for at least one vector mode. */
bool
-supports_vec_gather_load_p (machine_mode mode)
+supports_vec_gather_load_p (machine_mode mode, bool strided_p)
{
if (!this_fn_optabs->supports_vec_gather_load[mode])
this_fn_optabs->supports_vec_gather_load[mode]
= (supports_vec_convert_optab_p (gather_load_optab, mode)
- || supports_vec_convert_optab_p (mask_gather_load_optab, mode)
- || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode)
- ? 1 : -1);
+ || supports_vec_convert_optab_p (mask_gather_load_optab, mode)
+ || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode)
+ || (strided_p
+ && convert_optab_handler (mask_len_strided_load_optab, mode,
+ Pmode)
+ != CODE_FOR_nothing)
+ ? 1
+ : -1);
return this_fn_optabs->supports_vec_gather_load[mode] > 0;
}
@@ -687,14 +692,20 @@ supports_vec_gather_load_p (machine_mode mode)
for at least one vector mode. */
bool
-supports_vec_scatter_store_p (machine_mode mode)
+supports_vec_scatter_store_p (machine_mode mode, bool strided_p)
{
if (!this_fn_optabs->supports_vec_scatter_store[mode])
this_fn_optabs->supports_vec_scatter_store[mode]
= (supports_vec_convert_optab_p (scatter_store_optab, mode)
- || supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
- || supports_vec_convert_optab_p (mask_len_scatter_store_optab, mode)
- ? 1 : -1);
+ || supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
+ || supports_vec_convert_optab_p (mask_len_scatter_store_optab,
+ mode)
+ || (strided_p
+ && convert_optab_handler (mask_len_strided_store_optab, mode,
+ Pmode)
+ != CODE_FOR_nothing)
+ ? 1
+ : -1);
return this_fn_optabs->supports_vec_scatter_store[mode] > 0;
}
diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
index 920eb6a1b67..7c22edc5a78 100644
--- a/gcc/optabs-query.h
+++ b/gcc/optabs-query.h
@@ -191,8 +191,8 @@ bool can_compare_and_swap_p (machine_mode, bool);
bool can_atomic_exchange_p (machine_mode, bool);
bool can_atomic_load_p (machine_mode);
bool lshift_cheap_p (bool);
-bool supports_vec_gather_load_p (machine_mode = E_VOIDmode);
-bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode);
+bool supports_vec_gather_load_p (machine_mode = E_VOIDmode, bool = false);
+bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode, bool = false);
bool can_vec_extract (machine_mode, machine_mode);
/* Version of find_widening_optab_handler_and_mode that operates on
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index d5c9c4a11c2..d374849b0a7 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -3913,9 +3913,9 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
*IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. */
bool
-vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
- tree vectype, tree memory_type, tree offset_type,
- int scale, internal_fn *ifn_out,
+vect_gather_scatter_fn_p (vec_info *vinfo, bool strided_p, bool read_p,
+ bool masked_p, tree vectype, tree memory_type,
+ tree offset_type, int scale, internal_fn *ifn_out,
tree *offset_vectype_out)
{
unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
@@ -3926,7 +3926,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
return false;
/* Work out which function we need. */
- internal_fn ifn, alt_ifn, alt_ifn2;
+ internal_fn ifn, alt_ifn, alt_ifn2, alt_ifn3;
if (read_p)
{
ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
@@ -3935,6 +3935,12 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
use MASK_LEN_GATHER_LOAD regardless whether len and
mask are valid or not. */
alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD;
+ /* When target supports MASK_LEN_STRIDED_LOAD, we can relax the
+ restrictions around the relationship of the vector offset type
+ to the loaded by using a gather load with strided access.
+ E.g. a "gather" of N bytes with a 64-bit stride would in principle
+ be possible without needing an Nx64-bit vector offset type. */
+ alt_ifn3 = IFN_MASK_LEN_STRIDED_LOAD;
}
else
{
@@ -3944,6 +3950,12 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
use MASK_LEN_SCATTER_STORE regardless whether len and
mask are valid or not. */
alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE;
+ /* When target supports MASK_LEN_STRIDED_STORE, we can relax the
+ restrictions around the relationship of the vector offset type
+ to the stored by using a scatter store with strided access.
+ E.g. a "scatter" of N bytes with a 64-bit stride would in principle
+ be possible without needing an Nx64-bit vector offset type. */
+ alt_ifn3 = IFN_MASK_LEN_STRIDED_STORE;
}
for (;;)
@@ -3953,8 +3965,20 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
return false;
/* Test whether the target supports this combination. */
- if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
- offset_vectype, scale))
+ /* We don't need to check whether target supports gather/scatter IFN
+ with expected vector offset for gather/scatter with a strided access
+ when target itself support strided load/store IFN. */
+ if (strided_p
+ && internal_strided_fn_supported_p (alt_ifn3, vectype, offset_type,
+ scale))
+ {
+ *ifn_out = alt_ifn3;
+ *offset_vectype_out = offset_vectype;
+ return true;
+ }
+ else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
+ memory_type,
+ offset_vectype, scale))
{
*ifn_out = ifn;
*offset_vectype_out = offset_vectype;
@@ -4047,9 +4071,12 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
/* True if we should aim to use internal functions rather than
built-in functions. */
- bool use_ifn_p = (DR_IS_READ (dr)
- ? supports_vec_gather_load_p (TYPE_MODE (vectype))
- : supports_vec_scatter_store_p (TYPE_MODE (vectype)));
+ bool use_ifn_p
+ = (DR_IS_READ (dr)
+ ? supports_vec_gather_load_p (TYPE_MODE (vectype),
+ STMT_VINFO_STRIDED_P (stmt_info))
+ : supports_vec_scatter_store_p (TYPE_MODE (vectype),
+ STMT_VINFO_STRIDED_P (stmt_info)));
base = DR_REF (dr);
/* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
@@ -4196,13 +4223,17 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
/* Only treat this as a scaling operation if the target
supports it for at least some offset type. */
if (use_ifn_p
- && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
- masked_p, vectype, memory_type,
+ && !vect_gather_scatter_fn_p (loop_vinfo,
+ STMT_VINFO_STRIDED_P (stmt_info),
+ DR_IS_READ (dr), masked_p,
+ vectype, memory_type,
signed_char_type_node,
new_scale, &ifn,
&offset_vectype)
- && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
- masked_p, vectype, memory_type,
+ && !vect_gather_scatter_fn_p (loop_vinfo,
+ STMT_VINFO_STRIDED_P (stmt_info),
+ DR_IS_READ (dr), masked_p,
+ vectype, memory_type,
unsigned_char_type_node,
new_scale, &ifn,
&offset_vectype))
@@ -4225,8 +4256,10 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
if (use_ifn_p
&& TREE_CODE (off) == SSA_NAME
&& !POINTER_TYPE_P (TREE_TYPE (off))
- && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
- masked_p, vectype, memory_type,
+ && vect_gather_scatter_fn_p (loop_vinfo,
+ STMT_VINFO_STRIDED_P (stmt_info),
+ DR_IS_READ (dr), masked_p,
+ vectype, memory_type,
TREE_TYPE (off), scale, &ifn,
&offset_vectype))
break;
@@ -4280,9 +4313,11 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
if (use_ifn_p)
{
- if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
- vectype, memory_type, offtype, scale,
- &ifn, &offset_vectype))
+ if (!vect_gather_scatter_fn_p (loop_vinfo,
+ STMT_VINFO_STRIDED_P (stmt_info),
+ DR_IS_READ (dr), masked_p, vectype,
+ memory_type, offtype, scale, &ifn,
+ &offset_vectype))
ifn = IFN_LAST;
decl = NULL_TREE;
}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index a9200767f67..8ff06bd3acb 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1506,10 +1506,15 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
internal_fn len_ifn = (is_load
? IFN_MASK_LEN_GATHER_LOAD
: IFN_MASK_LEN_SCATTER_STORE);
- if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
- gs_info->memory_type,
- gs_info->offset_vectype,
- gs_info->scale))
+ if (internal_strided_fn_p (gs_info->ifn)
+ && internal_strided_fn_supported_p (gs_info->ifn, vectype,
+ TREE_TYPE (gs_info->offset),
+ gs_info->scale))
+ vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
+ else if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
+ gs_info->memory_type,
+ gs_info->offset_vectype,
+ gs_info->scale))
vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
gs_info->memory_type,
@@ -1693,8 +1698,10 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
/* See whether the target supports the operation with an offset
no narrower than OFFSET_TYPE. */
tree memory_type = TREE_TYPE (DR_REF (dr));
- if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
- vectype, memory_type, offset_type, scale,
+ if (!vect_gather_scatter_fn_p (loop_vinfo,
+ STMT_VINFO_STRIDED_P (stmt_info),
+ DR_IS_READ (dr), masked_p, vectype,
+ memory_type, offset_type, scale,
&gs_info->ifn, &gs_info->offset_vectype)
|| gs_info->ifn == IFN_LAST)
continue;
@@ -1734,6 +1741,15 @@ vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
|| gs_info->ifn == IFN_LAST)
return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
masked_p, gs_info);
+ else if (internal_strided_fn_p (gs_info->ifn))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "using strided IFN for strided/grouped access,"
+ " scale = %d\n",
+ gs_info->scale);
+ return true;
+ }
tree old_offset_type = TREE_TYPE (gs_info->offset);
tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
@@ -3012,9 +3028,13 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
ssize_int (gs_info->scale));
step = fold_convert (offset_type, step);
- /* Create {0, X, X*2, X*3, ...}. */
- tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
- build_zero_cst (offset_type), step);
+ tree offset;
+ if (internal_strided_fn_p (gs_info->ifn))
+ offset = step;
+ else
+ /* Create {0, X, X*2, X*3, ...}. */
+ offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
+ build_zero_cst (offset_type), step);
*vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
}
@@ -9125,7 +9145,7 @@ vectorizable_store (vec_info *vinfo,
vec_offset = vec_offsets[j];
tree scale = size_int (gs_info.scale);
- if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
+ if (internal_fn_len_index (gs_info.ifn) >= 0)
{
if (loop_lens)
final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
@@ -9145,7 +9165,7 @@ vectorizable_store (vec_info *vinfo,
gcall *call;
if (final_len && final_mask)
- call = gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE,
+ call = gimple_build_call_internal (gs_info.ifn,
7, dataref_ptr, vec_offset,
scale, vec_oprnd, final_mask,
final_len, bias);
@@ -10949,7 +10969,7 @@ vectorizable_load (vec_info *vinfo,
tree zero = build_zero_cst (vectype);
tree scale = size_int (gs_info.scale);
- if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
+ if (internal_fn_len_index (gs_info.ifn) >= 0)
{
if (loop_lens)
final_len
@@ -10973,7 +10993,7 @@ vectorizable_load (vec_info *vinfo,
gcall *call;
if (final_len && final_mask)
call
- = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
+ = gimple_build_call_internal (gs_info.ifn, 7,
dataref_ptr, vec_offset,
scale, zero, final_mask,
final_len, bias);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index a4043e4a656..76bf3aa14b4 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2309,7 +2309,8 @@ extern opt_result vect_analyze_data_refs_alignment (loop_vec_info);
extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance);
extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
-extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
+extern bool vect_gather_scatter_fn_p (vec_info *,
+ bool, bool, bool, tree, tree,
tree, int, internal_fn *, tree *);
extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
gather_scatter_info *);
@@ -670,14 +670,19 @@ supports_vec_convert_optab_p (optab op, machine_mode mode)
for at least one vector mode. */
bool
-supports_vec_gather_load_p (machine_mode mode)
+supports_vec_gather_load_p (machine_mode mode, bool strided_p)
{
if (!this_fn_optabs->supports_vec_gather_load[mode])
this_fn_optabs->supports_vec_gather_load[mode]
= (supports_vec_convert_optab_p (gather_load_optab, mode)
- || supports_vec_convert_optab_p (mask_gather_load_optab, mode)
- || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode)
- ? 1 : -1);
+ || supports_vec_convert_optab_p (mask_gather_load_optab, mode)
+ || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode)
+ || (strided_p
+ && convert_optab_handler (mask_len_strided_load_optab, mode,
+ Pmode)
+ != CODE_FOR_nothing)
+ ? 1
+ : -1);
return this_fn_optabs->supports_vec_gather_load[mode] > 0;
}
@@ -687,14 +692,20 @@ supports_vec_gather_load_p (machine_mode mode)
for at least one vector mode. */
bool
-supports_vec_scatter_store_p (machine_mode mode)
+supports_vec_scatter_store_p (machine_mode mode, bool strided_p)
{
if (!this_fn_optabs->supports_vec_scatter_store[mode])
this_fn_optabs->supports_vec_scatter_store[mode]
= (supports_vec_convert_optab_p (scatter_store_optab, mode)
- || supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
- || supports_vec_convert_optab_p (mask_len_scatter_store_optab, mode)
- ? 1 : -1);
+ || supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
+ || supports_vec_convert_optab_p (mask_len_scatter_store_optab,
+ mode)
+ || (strided_p
+ && convert_optab_handler (mask_len_strided_store_optab, mode,
+ Pmode)
+ != CODE_FOR_nothing)
+ ? 1
+ : -1);
return this_fn_optabs->supports_vec_scatter_store[mode] > 0;
}
@@ -191,8 +191,8 @@ bool can_compare_and_swap_p (machine_mode, bool);
bool can_atomic_exchange_p (machine_mode, bool);
bool can_atomic_load_p (machine_mode);
bool lshift_cheap_p (bool);
-bool supports_vec_gather_load_p (machine_mode = E_VOIDmode);
-bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode);
+bool supports_vec_gather_load_p (machine_mode = E_VOIDmode, bool = false);
+bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode, bool = false);
bool can_vec_extract (machine_mode, machine_mode);
/* Version of find_widening_optab_handler_and_mode that operates on
@@ -3913,9 +3913,9 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
*IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. */
bool
-vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
- tree vectype, tree memory_type, tree offset_type,
- int scale, internal_fn *ifn_out,
+vect_gather_scatter_fn_p (vec_info *vinfo, bool strided_p, bool read_p,
+ bool masked_p, tree vectype, tree memory_type,
+ tree offset_type, int scale, internal_fn *ifn_out,
tree *offset_vectype_out)
{
unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
@@ -3926,7 +3926,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
return false;
/* Work out which function we need. */
- internal_fn ifn, alt_ifn, alt_ifn2;
+ internal_fn ifn, alt_ifn, alt_ifn2, alt_ifn3;
if (read_p)
{
ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
@@ -3935,6 +3935,12 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
use MASK_LEN_GATHER_LOAD regardless whether len and
mask are valid or not. */
alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD;
+ /* When target supports MASK_LEN_STRIDED_LOAD, we can relax the
+ restrictions around the relationship of the vector offset type
+ to the loaded by using a gather load with strided access.
+ E.g. a "gather" of N bytes with a 64-bit stride would in principle
+ be possible without needing an Nx64-bit vector offset type. */
+ alt_ifn3 = IFN_MASK_LEN_STRIDED_LOAD;
}
else
{
@@ -3944,6 +3950,12 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
use MASK_LEN_SCATTER_STORE regardless whether len and
mask are valid or not. */
alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE;
+ /* When target supports MASK_LEN_STRIDED_STORE, we can relax the
+ restrictions around the relationship of the vector offset type
+ to the stored by using a scatter store with strided access.
+ E.g. a "scatter" of N bytes with a 64-bit stride would in principle
+ be possible without needing an Nx64-bit vector offset type. */
+ alt_ifn3 = IFN_MASK_LEN_STRIDED_STORE;
}
for (;;)
@@ -3953,8 +3965,20 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
return false;
/* Test whether the target supports this combination. */
- if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
- offset_vectype, scale))
+ /* We don't need to check whether target supports gather/scatter IFN
+ with expected vector offset for gather/scatter with a strided access
+ when target itself support strided load/store IFN. */
+ if (strided_p
+ && internal_strided_fn_supported_p (alt_ifn3, vectype, offset_type,
+ scale))
+ {
+ *ifn_out = alt_ifn3;
+ *offset_vectype_out = offset_vectype;
+ return true;
+ }
+ else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
+ memory_type,
+ offset_vectype, scale))
{
*ifn_out = ifn;
*offset_vectype_out = offset_vectype;
@@ -4047,9 +4071,12 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
/* True if we should aim to use internal functions rather than
built-in functions. */
- bool use_ifn_p = (DR_IS_READ (dr)
- ? supports_vec_gather_load_p (TYPE_MODE (vectype))
- : supports_vec_scatter_store_p (TYPE_MODE (vectype)));
+ bool use_ifn_p
+ = (DR_IS_READ (dr)
+ ? supports_vec_gather_load_p (TYPE_MODE (vectype),
+ STMT_VINFO_STRIDED_P (stmt_info))
+ : supports_vec_scatter_store_p (TYPE_MODE (vectype),
+ STMT_VINFO_STRIDED_P (stmt_info)));
base = DR_REF (dr);
/* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
@@ -4196,13 +4223,17 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
/* Only treat this as a scaling operation if the target
supports it for at least some offset type. */
if (use_ifn_p
- && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
- masked_p, vectype, memory_type,
+ && !vect_gather_scatter_fn_p (loop_vinfo,
+ STMT_VINFO_STRIDED_P (stmt_info),
+ DR_IS_READ (dr), masked_p,
+ vectype, memory_type,
signed_char_type_node,
new_scale, &ifn,
&offset_vectype)
- && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
- masked_p, vectype, memory_type,
+ && !vect_gather_scatter_fn_p (loop_vinfo,
+ STMT_VINFO_STRIDED_P (stmt_info),
+ DR_IS_READ (dr), masked_p,
+ vectype, memory_type,
unsigned_char_type_node,
new_scale, &ifn,
&offset_vectype))
@@ -4225,8 +4256,10 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
if (use_ifn_p
&& TREE_CODE (off) == SSA_NAME
&& !POINTER_TYPE_P (TREE_TYPE (off))
- && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
- masked_p, vectype, memory_type,
+ && vect_gather_scatter_fn_p (loop_vinfo,
+ STMT_VINFO_STRIDED_P (stmt_info),
+ DR_IS_READ (dr), masked_p,
+ vectype, memory_type,
TREE_TYPE (off), scale, &ifn,
&offset_vectype))
break;
@@ -4280,9 +4313,11 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
if (use_ifn_p)
{
- if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
- vectype, memory_type, offtype, scale,
- &ifn, &offset_vectype))
+ if (!vect_gather_scatter_fn_p (loop_vinfo,
+ STMT_VINFO_STRIDED_P (stmt_info),
+ DR_IS_READ (dr), masked_p, vectype,
+ memory_type, offtype, scale, &ifn,
+ &offset_vectype))
ifn = IFN_LAST;
decl = NULL_TREE;
}
@@ -1506,10 +1506,15 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
internal_fn len_ifn = (is_load
? IFN_MASK_LEN_GATHER_LOAD
: IFN_MASK_LEN_SCATTER_STORE);
- if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
- gs_info->memory_type,
- gs_info->offset_vectype,
- gs_info->scale))
+ if (internal_strided_fn_p (gs_info->ifn)
+ && internal_strided_fn_supported_p (gs_info->ifn, vectype,
+ TREE_TYPE (gs_info->offset),
+ gs_info->scale))
+ vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
+ else if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
+ gs_info->memory_type,
+ gs_info->offset_vectype,
+ gs_info->scale))
vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
gs_info->memory_type,
@@ -1693,8 +1698,10 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
/* See whether the target supports the operation with an offset
no narrower than OFFSET_TYPE. */
tree memory_type = TREE_TYPE (DR_REF (dr));
- if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
- vectype, memory_type, offset_type, scale,
+ if (!vect_gather_scatter_fn_p (loop_vinfo,
+ STMT_VINFO_STRIDED_P (stmt_info),
+ DR_IS_READ (dr), masked_p, vectype,
+ memory_type, offset_type, scale,
&gs_info->ifn, &gs_info->offset_vectype)
|| gs_info->ifn == IFN_LAST)
continue;
@@ -1734,6 +1741,15 @@ vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
|| gs_info->ifn == IFN_LAST)
return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
masked_p, gs_info);
+ else if (internal_strided_fn_p (gs_info->ifn))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "using strided IFN for strided/grouped access,"
+ " scale = %d\n",
+ gs_info->scale);
+ return true;
+ }
tree old_offset_type = TREE_TYPE (gs_info->offset);
tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
@@ -3012,9 +3028,13 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
ssize_int (gs_info->scale));
step = fold_convert (offset_type, step);
- /* Create {0, X, X*2, X*3, ...}. */
- tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
- build_zero_cst (offset_type), step);
+ tree offset;
+ if (internal_strided_fn_p (gs_info->ifn))
+ offset = step;
+ else
+ /* Create {0, X, X*2, X*3, ...}. */
+ offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
+ build_zero_cst (offset_type), step);
*vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
}
@@ -9125,7 +9145,7 @@ vectorizable_store (vec_info *vinfo,
vec_offset = vec_offsets[j];
tree scale = size_int (gs_info.scale);
- if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
+ if (internal_fn_len_index (gs_info.ifn) >= 0)
{
if (loop_lens)
final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
@@ -9145,7 +9165,7 @@ vectorizable_store (vec_info *vinfo,
gcall *call;
if (final_len && final_mask)
- call = gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE,
+ call = gimple_build_call_internal (gs_info.ifn,
7, dataref_ptr, vec_offset,
scale, vec_oprnd, final_mask,
final_len, bias);
@@ -10949,7 +10969,7 @@ vectorizable_load (vec_info *vinfo,
tree zero = build_zero_cst (vectype);
tree scale = size_int (gs_info.scale);
- if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
+ if (internal_fn_len_index (gs_info.ifn) >= 0)
{
if (loop_lens)
final_len
@@ -10973,7 +10993,7 @@ vectorizable_load (vec_info *vinfo,
gcall *call;
if (final_len && final_mask)
call
- = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
+ = gimple_build_call_internal (gs_info.ifn, 7,
dataref_ptr, vec_offset,
scale, zero, final_mask,
final_len, bias);
@@ -2309,7 +2309,8 @@ extern opt_result vect_analyze_data_refs_alignment (loop_vec_info);
extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance);
extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
-extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
+extern bool vect_gather_scatter_fn_p (vec_info *,
+ bool, bool, bool, tree, tree,
tree, int, internal_fn *, tree *);
extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
gather_scatter_info *);