@@ -123,7 +123,6 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! vect_scatter_store } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target vect_scatter_store } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vector alignment may not be reachable" 1 "vect" { target { ! vector_alignment_reachable } } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" { target { ! vector_alignment_reachable } } } } */
@@ -39,4 +39,4 @@ int main (int argc, char **argv)
return 0;
}
-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { ! aarch64_sve } } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
@@ -39,4 +39,4 @@ int main (int argc, char **argv)
return 0;
}
-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { ! aarch64_sve } } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
@@ -39,4 +39,4 @@ int main (int argc, char **argv)
return 0;
}
-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { ! aarch64_sve } } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
@@ -36,4 +36,4 @@ int main (void)
return main1 ();
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_scatter_store } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
@@ -4464,9 +4464,7 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal)
&& !TREE_THIS_VOLATILE (DR_REF (dr));
bool maybe_scatter
= DR_IS_WRITE (dr)
- && !TREE_THIS_VOLATILE (DR_REF (dr))
- && (targetm.vectorize.builtin_scatter != NULL
- || supports_vec_scatter_store_p ());
+ && !TREE_THIS_VOLATILE (DR_REF (dr));
/* If target supports vector gather loads or scatter stores,
see if they can't be used. */
@@ -942,6 +942,7 @@ cfun_returns (tree decl)
static void
vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
vect_memory_access_type memory_access_type,
+ gather_scatter_info *gs_info,
dr_alignment_support alignment_support_scheme,
int misalignment,
vec_load_store_type vls_type, slp_tree slp_node,
@@ -997,8 +998,16 @@ vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
if (memory_access_type == VMAT_ELEMENTWISE
|| memory_access_type == VMAT_GATHER_SCATTER)
{
- /* N scalar stores plus extracting the elements. */
unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
+ if (memory_access_type == VMAT_GATHER_SCATTER
+ && gs_info->ifn == IFN_LAST && !gs_info->decl)
+ /* For emulated scatter N offset vector element extracts
+ (we assume the scalar scaling and ptr + offset add is consumed by
+ the load). */
+ inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
+ vec_to_scalar, stmt_info, 0,
+ vect_body);
+ /* N scalar stores plus extracting the elements. */
inside_cost += record_stmt_cost (cost_vec,
ncopies * assumed_nunits,
scalar_store, stmt_info, 0, vect_body);
@@ -1008,7 +1017,9 @@ vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
misalignment, &inside_cost, cost_vec);
if (memory_access_type == VMAT_ELEMENTWISE
- || memory_access_type == VMAT_STRIDED_SLP)
+ || memory_access_type == VMAT_STRIDED_SLP
+ || (memory_access_type == VMAT_GATHER_SCATTER
+ && gs_info->ifn == IFN_LAST && !gs_info->decl))
{
/* N scalar stores plus extracting the elements. */
unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
@@ -2503,19 +2514,11 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
}
else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
{
- if (vls_type != VLS_LOAD)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unsupported emulated scatter.\n");
- return false;
- }
- else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
- || !TYPE_VECTOR_SUBPARTS
- (gs_info->offset_vectype).is_constant ()
- || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
- (gs_info->offset_vectype),
- TYPE_VECTOR_SUBPARTS (vectype)))
+ if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
+ || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
+ || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
+ (gs_info->offset_vectype),
+ TYPE_VECTOR_SUBPARTS (vectype)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7751,6 +7754,15 @@ vectorizable_store (vec_info *vinfo,
"unsupported access type for masked store.\n");
return false;
}
+ else if (memory_access_type == VMAT_GATHER_SCATTER
+ && gs_info.ifn == IFN_LAST
+ && !gs_info.decl)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "unsupported masked emulated scatter.\n");
+ return false;
+ }
}
else
{
@@ -7814,7 +7826,8 @@ vectorizable_store (vec_info *vinfo,
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
vect_model_store_cost (vinfo, stmt_info, ncopies,
- memory_access_type, alignment_support_scheme,
+ memory_access_type, &gs_info,
+ alignment_support_scheme,
misalignment, vls_type, slp_node, cost_vec);
return true;
}
@@ -8575,7 +8588,8 @@ vectorizable_store (vec_info *vinfo,
final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
final_mask, vec_mask, gsi);
- if (memory_access_type == VMAT_GATHER_SCATTER)
+ if (memory_access_type == VMAT_GATHER_SCATTER
+ && gs_info.ifn != IFN_LAST)
{
tree scale = size_int (gs_info.scale);
gcall *call;
@@ -8592,6 +8606,60 @@ vectorizable_store (vec_info *vinfo,
new_stmt = call;
break;
}
+ else if (memory_access_type == VMAT_GATHER_SCATTER)
+ {
+ /* Emulated scatter. */
+ gcc_assert (!final_mask);
+ unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
+ unsigned HOST_WIDE_INT const_offset_nunits
+ = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
+ .to_constant ();
+ vec<constructor_elt, va_gc> *ctor_elts;
+ vec_alloc (ctor_elts, const_nunits);
+ gimple_seq stmts = NULL;
+ tree elt_type = TREE_TYPE (vectype);
+ unsigned HOST_WIDE_INT elt_size
+ = tree_to_uhwi (TYPE_SIZE (elt_type));
+ /* We support offset vectors with more elements
+ than the data vector for now. */
+ unsigned HOST_WIDE_INT factor
+ = const_offset_nunits / const_nunits;
+ vec_offset = vec_offsets[j / factor];
+ unsigned elt_offset = (j % factor) * const_nunits;
+ tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
+ tree scale = size_int (gs_info.scale);
+ align = get_object_alignment (DR_REF (first_dr_info->dr));
+ tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
+ for (unsigned k = 0; k < const_nunits; ++k)
+ {
+ /* Compute the offsetted pointer. */
+ tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
+ bitsize_int (k + elt_offset));
+ tree idx = gimple_build (&stmts, BIT_FIELD_REF,
+ idx_type, vec_offset,
+ TYPE_SIZE (idx_type), boff);
+ idx = gimple_convert (&stmts, sizetype, idx);
+ idx = gimple_build (&stmts, MULT_EXPR,
+ sizetype, idx, scale);
+ tree ptr = gimple_build (&stmts, PLUS_EXPR,
+ TREE_TYPE (dataref_ptr),
+ dataref_ptr, idx);
+ ptr = gimple_convert (&stmts, ptr_type_node, ptr);
+ /* Extract the element to be stored. */
+ tree elt = gimple_build (&stmts, BIT_FIELD_REF,
+ TREE_TYPE (vectype), vec_oprnd,
+ TYPE_SIZE (elt_type),
+ bitsize_int (k * elt_size));
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ stmts = NULL;
+ tree ref = build2 (MEM_REF, ltype, ptr,
+ build_int_cst (ref_type, 0));
+ new_stmt = gimple_build_assign (ref, elt);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
+ }
+ break;
+ }
if (i > 0)
/* Bump the vector pointer. */