@@ -44,4 +44,4 @@ int main (int argc, char **argv)
return 0;
}
-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { ! riscv_v } } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
@@ -58,4 +58,4 @@ main (void)
return 0;
}
-/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target vect_gather_load_ifn } } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect } } */
@@ -8,6 +8,7 @@ f1 (int *restrict y, int *restrict x1, int *restrict x2,
{
for (int i = 0; i < N; ++i)
{
+ /* Different base. */
y[i * 2] = x1[indices[i * 2]] + 1;
y[i * 2 + 1] = x2[indices[i * 2 + 1]] + 2;
}
@@ -18,8 +19,9 @@ f2 (int *restrict y, int *restrict x, int *restrict indices)
{
for (int i = 0; i < N; ++i)
{
- y[i * 2] = x[indices[i * 2]] + 1;
- y[i * 2 + 1] = x[indices[i * 2 + 1] * 2] + 2;
+ /* Different scale. */
+ y[i * 2] = *(int *)((char *)x + (__UINTPTR_TYPE__)indices[i * 2] * 4) + 1;
+ y[i * 2 + 1] = *(int *)((char *)x + (__UINTPTR_TYPE__)indices[i * 2 + 1] * 2) + 2;
}
}
@@ -28,9 +30,12 @@ f3 (int *restrict y, int *restrict x, int *restrict indices)
{
for (int i = 0; i < N; ++i)
{
+ /* Different type. */
y[i * 2] = x[indices[i * 2]] + 1;
- y[i * 2 + 1] = x[(unsigned int) indices[i * 2 + 1]] + 2;
+ y[i * 2 + 1] = x[((unsigned int *) indices)[i * 2 + 1]] + 2;
}
}
-/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect { target vect_gather_load_ifn } } } */
+/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect } } */
+/* { dg-final { scan-tree-dump "different gather base" vect { target { ! vect_gather_load_ifn } } } } */
+/* { dg-final { scan-tree-dump "different gather scale" vect { target { ! vect_gather_load_ifn } } } } */
@@ -62,4 +62,4 @@ main (void)
return 0;
}
-/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target { vect_gather_load_ifn && vect_masked_load } } } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target { { vect_gather_load_ifn || avx2 } && vect_masked_load } } } } */
@@ -39,10 +39,10 @@ f3 (int *restrict y, int *restrict x, int *restrict indices)
y[i * 2] = (indices[i * 2] < N * 2
? x[indices[i * 2]] + 1
: 1);
- y[i * 2 + 1] = (indices[i * 2 + 1] < N * 2
- ? x[(unsigned int) indices[i * 2 + 1]] + 2
+ y[i * 2 + 1] = (((unsigned int *)indices)[i * 2 + 1] < N * 2
+ ? x[((unsigned int *) indices)[i * 2 + 1]] + 2
: 2);
}
}
-/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect { target vect_gather_load_ifn } } } */
+/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect } } */
@@ -11362,8 +11362,7 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
updated offset we set using ADVANCE. Instead we have to make sure the
reference in the data references point to the corresponding copy of
the original in the epilogue. */
- if (STMT_VINFO_MEMORY_ACCESS_TYPE (vect_stmt_to_vectorize (stmt_vinfo))
- == VMAT_GATHER_SCATTER)
+ if (STMT_VINFO_GATHER_SCATTER_P (vect_stmt_to_vectorize (stmt_vinfo)))
{
DR_REF (dr)
= simplify_replace_tree (DR_REF (dr), NULL_TREE, NULL_TREE,
@@ -11372,6 +11371,9 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
= simplify_replace_tree (DR_BASE_ADDRESS (dr), NULL_TREE, NULL_TREE,
&find_in_mapping, &mapping);
}
+ else
+ gcc_assert (STMT_VINFO_MEMORY_ACCESS_TYPE (vect_stmt_to_vectorize (stmt_vinfo))
+ != VMAT_GATHER_SCATTER);
DR_STMT (dr) = STMT_VINFO_STMT (stmt_vinfo);
stmt_vinfo->dr_aux.stmt = stmt_vinfo;
/* The vector size of the epilogue is smaller than that of the main loop
@@ -283,10 +283,11 @@ typedef struct _slp_oprnd_info
vec<tree> ops;
/* Information about the first statement, its vector def-type, type, the
operand itself in case it's constant, and an indication if it's a pattern
- stmt. */
+ stmt and gather/scatter info. */
tree first_op_type;
enum vect_def_type first_dt;
bool any_pattern;
+ gather_scatter_info first_gs_info;
} *slp_oprnd_info;
@@ -609,6 +610,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
unsigned int i, number_of_oprnds;
enum vect_def_type dt = vect_uninitialized_def;
slp_oprnd_info oprnd_info;
+ gather_scatter_info gs_info;
unsigned int commutative_op = -1U;
bool first = stmt_num == 0;
@@ -660,6 +662,19 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
oprnd_info = (*oprnds_info)[i];
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ {
+ gcc_assert (number_of_oprnds == 1);
+ if (!is_a <loop_vec_info> (vinfo)
+ || !vect_check_gather_scatter (stmt_info,
+ as_a <loop_vec_info> (vinfo),
+ first ? &oprnd_info->first_gs_info
+ : &gs_info))
+ return -1;
+
+ oprnd = first ? oprnd_info->first_gs_info.offset : gs_info.offset;
+ }
+
stmt_vec_info def_stmt_info;
if (!vect_is_simple_use (oprnd, vinfo, &dts[i], &def_stmt_info))
{
@@ -792,6 +807,25 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
return 1;
}
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ {
+ if (!operand_equal_p (oprnd_info->first_gs_info.base,
+ gs_info.base))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Build SLP failed: different gather base\n");
+ return 1;
+ }
+ if (oprnd_info->first_gs_info.scale != gs_info.scale)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Build SLP failed: different gather scale\n");
+ return 1;
+ }
+ }
+
/* Not first stmt of the group, check that the def-stmt/s match
the def-stmt/s of the first stmt. Allow different definition
types for reduction chains: the first stmt must be a
@@ -1235,6 +1269,9 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
|| rhs_code == INDIRECT_REF
|| rhs_code == COMPONENT_REF
|| rhs_code == MEM_REF)))
+ || (ldst_p
+ && (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
+ != STMT_VINFO_GATHER_SCATTER_P (first_stmt_info)))
|| first_stmt_ldst_p != ldst_p
|| first_stmt_phi_p != phi_p)
{
@@ -1357,12 +1394,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))
&& rhs_code != CFN_GATHER_LOAD
&& rhs_code != CFN_MASK_GATHER_LOAD
+ && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
/* Not grouped loads are handled as externals for BB
vectorization. For loop vectorization we can handle
splats the same we handle single element interleaving. */
&& (is_a <bb_vec_info> (vinfo)
- || stmt_info != first_stmt_info
- || STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
+ || stmt_info != first_stmt_info))
{
/* Not grouped load. */
if (dump_enabled_p ())
@@ -1858,6 +1895,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
|| gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
|| gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD));
+ else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
else
{
*max_nunits = this_max_nunits;