@@ -5136,6 +5136,38 @@ of @code{QI} elements.
This pattern is not allowed to @code{FAIL}.
+@cindex @code{len_maskload@var{m}@var{n}} instruction pattern
+@item @samp{len_maskload@var{m}@var{n}}
+Perform a load of vector which is predicated by length and mask
+from memory operand 1 of mode @var{m} into register operand 0.
+Length is provided in operand 2 which has whichever
+integer mode the target prefers.
+Mask is provided in register operand 3 of mode @var{n}.
+
+operand 2 can be a variable or a constant amount. It can be vectorization
+factor which is the special constant value represents the maximum length.
+
+operand 3 can be a variable or a constant amount. It can be all 1
+which is the special constant value represents the full mask.
+
+This pattern is not allowed to @code{FAIL}.
+
+@cindex @code{len_maskstore@var{m}@var{n}} instruction pattern
+@item @samp{len_maskstore@var{m}@var{n}}
+Perform a store of vector which is predicated by length and mask
+from register operand 1 of mode @var{m} into memory operand 0.
+Length is provided in operand 2 which has whichever
+integer mode the target prefers.
+Mask is provided in register operand 3 of mode @var{n}.
+
+operand 2 can be a variable or a constant amount. It can be vectorization
+factor which is the special constant value represents the maximum length.
+
+operand 3 can be a variable or a constant amount. It can be all 1
+which is the special constant value represents the full mask.
+
+This pattern is not allowed to @code{FAIL}.
+
@cindex @code{vec_perm@var{m}} instruction pattern
@item @samp{vec_perm@var{m}}
Output a (variable) vector permutation. Operand 0 is the destination
@@ -376,7 +376,8 @@ main (int argc, const char **argv)
fprintf (s_file,
"/* Returns TRUE if the target supports any of the partial vector\n"
- " optabs: while_ult_optab, len_load_optab or len_store_optab,\n"
+ " optabs: while_ult_optab, len_load_optab, len_store_optab,\n"
+ " len_maskload_optab or len_maskstore_optab"
" for any mode. */\n"
"bool\npartial_vectors_supported_p (void)\n{\n");
bool any_match = false;
@@ -386,7 +387,8 @@ main (int argc, const char **argv)
{
#define CMP_NAME(N) !strncmp (p->name, (N), strlen ((N)))
if (CMP_NAME("while_ult") || CMP_NAME ("len_load")
- || CMP_NAME ("len_store"))
+ || CMP_NAME ("len_store") || CMP_NAME ("len_maskload")
+ || CMP_NAME ("len_maskstore"))
{
if (first)
fprintf (s_file, " HAVE_%s", p->name);
@@ -5370,8 +5370,8 @@ arith_overflowed_p (enum tree_code code, const_tree type,
return wi::min_precision (wres, sign) > TYPE_PRECISION (type);
}
-/* If IFN_{MASK,LEN}_LOAD/STORE call CALL is unconditional, return a MEM_REF
- for the memory it references, otherwise return null. VECTYPE is the
+/* If IFN_{MASK,LEN,LEN_MASK}_LOAD/STORE call CALL is unconditional, return a
+ MEM_REF for the memory it references, otherwise return null. VECTYPE is the
type of the memory vector. MASK_P indicates it's for MASK if true,
otherwise it's for LEN. */
@@ -5383,7 +5383,20 @@ gimple_fold_partial_load_store_mem_ref (gcall *call, tree vectype, bool mask_p)
if (!tree_fits_uhwi_p (alias_align))
return NULL_TREE;
- if (mask_p)
+ if (gimple_call_internal_fn (call) == IFN_LEN_MASK_LOAD
+ || gimple_call_internal_fn (call) == IFN_LEN_MASK_STORE)
+ {
+ tree basic_len = gimple_call_arg (call, 2);
+ if (!poly_int_tree_p (basic_len))
+ return NULL_TREE;
+ if (maybe_ne (tree_to_poly_uint64 (basic_len),
+ TYPE_VECTOR_SUBPARTS (vectype)))
+ return NULL_TREE;
+ tree mask = gimple_call_arg (call, 3);
+ if (!integer_all_onesp (mask))
+ return NULL_TREE;
+ }
+ else if (mask_p)
{
tree mask = gimple_call_arg (call, 2);
if (!integer_all_onesp (mask))
@@ -5409,7 +5422,7 @@ gimple_fold_partial_load_store_mem_ref (gcall *call, tree vectype, bool mask_p)
return fold_build2 (MEM_REF, vectype, ptr, offset);
}
-/* Try to fold IFN_{MASK,LEN}_LOAD call CALL. Return true on success.
+/* Try to fold IFN_{MASK,LEN,LEN_MASK}_LOAD call CALL. Return true on success.
MASK_P indicates it's for MASK if true, otherwise it's for LEN. */
static bool
@@ -5431,14 +5444,15 @@ gimple_fold_partial_load (gimple_stmt_iterator *gsi, gcall *call, bool mask_p)
return false;
}
-/* Try to fold IFN_{MASK,LEN}_STORE call CALL. Return true on success.
+/* Try to fold IFN_{MASK,LEN,LEN_MASK}_STORE call CALL. Return true on success.
MASK_P indicates it's for MASK if true, otherwise it's for LEN. */
static bool
gimple_fold_partial_store (gimple_stmt_iterator *gsi, gcall *call,
bool mask_p)
{
- tree rhs = gimple_call_arg (call, 3);
+ tree rhs = gimple_call_arg (
+ call, gimple_call_internal_fn (call) == IFN_LEN_MASK_STORE ? 4 : 3);
if (tree lhs
= gimple_fold_partial_load_store_mem_ref (call, TREE_TYPE (rhs), mask_p))
{
@@ -5659,9 +5673,11 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool inplace)
cplx_result = true;
break;
case IFN_MASK_LOAD:
+ case IFN_LEN_MASK_LOAD:
changed |= gimple_fold_partial_load (gsi, stmt, true);
break;
case IFN_MASK_STORE:
+ case IFN_LEN_MASK_STORE:
changed |= gimple_fold_partial_store (gsi, stmt, true);
break;
case IFN_LEN_LOAD:
@@ -165,6 +165,7 @@ init_internal_fns ()
#define mask_load_lanes_direct { -1, -1, false }
#define gather_load_direct { 3, 1, false }
#define len_load_direct { -1, -1, false }
+#define len_maskload_direct { -1, 3, false }
#define mask_store_direct { 3, 2, false }
#define store_lanes_direct { 0, 0, false }
#define mask_store_lanes_direct { 0, 0, false }
@@ -172,6 +173,7 @@ init_internal_fns ()
#define vec_cond_direct { 2, 0, false }
#define scatter_store_direct { 3, 1, false }
#define len_store_direct { 3, 3, false }
+#define len_maskstore_direct { 4, 3, false }
#define vec_set_direct { 3, 3, false }
#define unary_direct { 0, 0, true }
#define unary_convert_direct { -1, 0, true }
@@ -2875,6 +2877,17 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
create_input_operand (&ops[3], bias, QImode);
expand_insn (icode, 4, ops);
}
+ else if (optab == len_maskload_optab)
+ {
+ create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)),
+ TYPE_UNSIGNED (TREE_TYPE (maskt)));
+ maskt = gimple_call_arg (stmt, 3);
+ mask = expand_normal (maskt);
+ create_input_operand (&ops[3], mask, TYPE_MODE (TREE_TYPE (maskt)));
+ icode = convert_optab_handler (optab, TYPE_MODE (type),
+ TYPE_MODE (TREE_TYPE (maskt)));
+ expand_insn (icode, 4, ops);
+ }
else
{
create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
@@ -2888,6 +2901,7 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
#define expand_mask_load_optab_fn expand_partial_load_optab_fn
#define expand_mask_load_lanes_optab_fn expand_mask_load_optab_fn
#define expand_len_load_optab_fn expand_partial_load_optab_fn
+#define expand_len_maskload_optab_fn expand_partial_load_optab_fn
/* Expand MASK_STORE{,_LANES} or LEN_STORE call STMT using optab OPTAB. */
@@ -2900,7 +2914,7 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
insn_code icode;
maskt = gimple_call_arg (stmt, 2);
- rhs = gimple_call_arg (stmt, 3);
+ rhs = gimple_call_arg (stmt, optab == len_maskstore_optab ? 4 : 3);
type = TREE_TYPE (rhs);
lhs = expand_call_mem_ref (type, stmt, 0);
@@ -2927,6 +2941,16 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
create_input_operand (&ops[3], bias, QImode);
expand_insn (icode, 4, ops);
}
+ else if (optab == len_maskstore_optab)
+ {
+ create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)),
+ TYPE_UNSIGNED (TREE_TYPE (maskt)));
+ maskt = gimple_call_arg (stmt, 3);
+ mask = expand_normal (maskt);
+ create_input_operand (&ops[3], mask, TYPE_MODE (TREE_TYPE (maskt)));
+ icode = convert_optab_handler (optab, TYPE_MODE (type), GET_MODE (mask));
+ expand_insn (icode, 4, ops);
+ }
else
{
create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
@@ -2937,6 +2961,7 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
#define expand_mask_store_optab_fn expand_partial_store_optab_fn
#define expand_mask_store_lanes_optab_fn expand_mask_store_optab_fn
#define expand_len_store_optab_fn expand_partial_store_optab_fn
+#define expand_len_maskstore_optab_fn expand_partial_store_optab_fn
/* Expand VCOND, VCONDU and VCONDEQ optab internal functions.
The expansion of STMT happens based on OPTAB table associated. */
@@ -3890,6 +3915,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
#define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p
#define direct_gather_load_optab_supported_p convert_optab_supported_p
#define direct_len_load_optab_supported_p direct_optab_supported_p
+#define direct_len_maskload_optab_supported_p convert_optab_supported_p
#define direct_mask_store_optab_supported_p convert_optab_supported_p
#define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
#define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p
@@ -3897,6 +3923,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
#define direct_vec_cond_optab_supported_p convert_optab_supported_p
#define direct_scatter_store_optab_supported_p convert_optab_supported_p
#define direct_len_store_optab_supported_p direct_optab_supported_p
+#define direct_len_maskstore_optab_supported_p convert_optab_supported_p
#define direct_while_optab_supported_p convert_optab_supported_p
#define direct_fold_extract_optab_supported_p direct_optab_supported_p
#define direct_fold_left_optab_supported_p direct_optab_supported_p
@@ -4361,6 +4388,7 @@ internal_load_fn_p (internal_fn fn)
case IFN_GATHER_LOAD:
case IFN_MASK_GATHER_LOAD:
case IFN_LEN_LOAD:
+ case IFN_LEN_MASK_LOAD:
return true;
default:
@@ -4381,6 +4409,7 @@ internal_store_fn_p (internal_fn fn)
case IFN_SCATTER_STORE:
case IFN_MASK_SCATTER_STORE:
case IFN_LEN_STORE:
+ case IFN_LEN_MASK_STORE:
return true;
default:
@@ -4420,6 +4449,10 @@ internal_fn_mask_index (internal_fn fn)
case IFN_MASK_STORE_LANES:
return 2;
+ case IFN_LEN_MASK_LOAD:
+ case IFN_LEN_MASK_STORE:
+ return 3;
+
case IFN_MASK_GATHER_LOAD:
case IFN_MASK_SCATTER_STORE:
return 4;
@@ -4444,6 +4477,8 @@ internal_fn_stored_value_index (internal_fn fn)
case IFN_MASK_SCATTER_STORE:
case IFN_LEN_STORE:
return 3;
+ case IFN_LEN_MASK_STORE:
+ return 4;
default:
return -1;
@@ -50,12 +50,14 @@ along with GCC; see the file COPYING3. If not see
- mask_load_lanes: currently just vec_mask_load_lanes
- gather_load: used for {mask_,}gather_load
- len_load: currently just len_load
+ - len_maskload: currently just len_maskload
- mask_store: currently just maskstore
- store_lanes: currently just vec_store_lanes
- mask_store_lanes: currently just vec_mask_store_lanes
- scatter_store: used for {mask_,}scatter_store
- len_store: currently just len_store
+ - len_maskstore: currently just len_maskstore
- unary: a normal unary optab, such as vec_reverse_<mode>
- binary: a normal binary optab, such as vec_interleave_lo_<mode>
@@ -157,6 +159,7 @@ DEF_INTERNAL_OPTAB_FN (MASK_GATHER_LOAD, ECF_PURE,
mask_gather_load, gather_load)
DEF_INTERNAL_OPTAB_FN (LEN_LOAD, ECF_PURE, len_load, len_load)
+DEF_INTERNAL_OPTAB_FN (LEN_MASK_LOAD, ECF_PURE, len_maskload, len_maskload)
DEF_INTERNAL_OPTAB_FN (SCATTER_STORE, 0, scatter_store, scatter_store)
DEF_INTERNAL_OPTAB_FN (MASK_SCATTER_STORE, 0,
@@ -175,6 +178,7 @@ DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)
DEF_INTERNAL_OPTAB_FN (VEC_SET, 0, vec_set, vec_set)
DEF_INTERNAL_OPTAB_FN (LEN_STORE, 0, len_store, len_store)
+DEF_INTERNAL_OPTAB_FN (LEN_MASK_STORE, 0, len_maskstore, len_maskstore)
DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
DEF_INTERNAL_OPTAB_FN (SELECT_VL, ECF_CONST | ECF_NOTHROW, select_vl, binary)
@@ -624,6 +624,45 @@ get_len_load_store_mode (machine_mode mode, bool is_load)
return opt_machine_mode ();
}
+/* Return true if target supports vector length && masked load/store for mode.
+ Length is used on loop control and mask is used on flow control. */
+
+bool
+can_vec_len_mask_load_store_p (machine_mode mode, bool is_load)
+{
+ optab op = is_load ? len_maskload_optab : len_maskstore_optab;
+ machine_mode vmode;
+ machine_mode mask_mode;
+
+ /* If mode is vector mode, check it directly. */
+ if (VECTOR_MODE_P (mode))
+ return targetm.vectorize.get_mask_mode (mode).exists (&mask_mode)
+ && convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing;
+
+ scalar_mode smode;
+ if (is_a<scalar_mode> (mode, &smode))
+ /* See if there is any chance the mask load or store might be
+ vectorized. If not, punt. */
+ vmode = targetm.vectorize.preferred_simd_mode (smode);
+ else
+ vmode = mode;
+
+ if (VECTOR_MODE_P (vmode)
+ && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
+ && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
+ return true;
+
+ auto_vector_modes vector_modes;
+ targetm.vectorize.autovectorize_vector_modes (&vector_modes, true);
+ for (machine_mode base_mode : vector_modes)
+ if (related_vector_mode (base_mode, smode).exists (&vmode)
+ && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
+ && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
+ return true;
+
+ return false;
+}
+
/* Return true if there is a compare_and_swap pattern. */
bool
@@ -189,6 +189,7 @@ enum insn_code find_widening_optab_handler_and_mode (optab, machine_mode,
int can_mult_highpart_p (machine_mode, bool);
bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool);
opt_machine_mode get_len_load_store_mode (machine_mode, bool);
+bool can_vec_len_mask_load_store_p (machine_mode, bool);
bool can_compare_and_swap_p (machine_mode, bool);
bool can_atomic_exchange_p (machine_mode, bool);
bool can_atomic_load_p (machine_mode);
@@ -91,6 +91,8 @@ OPTAB_CD(vec_cmpu_optab, "vec_cmpu$a$b")
OPTAB_CD(vec_cmpeq_optab, "vec_cmpeq$a$b")
OPTAB_CD(maskload_optab, "maskload$a$b")
OPTAB_CD(maskstore_optab, "maskstore$a$b")
+OPTAB_CD(len_maskload_optab, "len_maskload$a$b")
+OPTAB_CD(len_maskstore_optab, "len_maskstore$a$b")
OPTAB_CD(gather_load_optab, "gather_load$a$b")
OPTAB_CD(mask_gather_load_optab, "mask_gather_load$a$b")
OPTAB_CD(scatter_store_optab, "scatter_store$a$b")
@@ -5816,6 +5816,8 @@ get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
}
case IFN_MASK_LOAD:
case IFN_MASK_STORE:
+ case IFN_LEN_MASK_LOAD:
+ case IFN_LEN_MASK_STORE:
break;
default:
clobbers_memory = true;
@@ -5861,11 +5863,13 @@ get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
switch (gimple_call_internal_fn (stmt))
{
case IFN_MASK_LOAD:
+ case IFN_LEN_MASK_LOAD:
if (gimple_call_lhs (stmt) == NULL_TREE)
break;
ref.is_read = true;
/* FALLTHRU */
case IFN_MASK_STORE:
+ case IFN_LEN_MASK_STORE:
ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
align = tree_to_shwi (gimple_call_arg (stmt, 1));
if (ref.is_read)
@@ -960,6 +960,9 @@ ifcvt_can_use_mask_load_store (gimple *stmt)
if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
return true;
+ if (can_vec_len_mask_load_store_p (mode, is_load))
+ return true;
+
return false;
}
@@ -2815,11 +2815,13 @@ ref_maybe_used_by_call_p_1 (gcall *call, ao_ref *ref, bool tbaa_p)
case IFN_SCATTER_STORE:
case IFN_MASK_SCATTER_STORE:
case IFN_LEN_STORE:
+ case IFN_LEN_MASK_STORE:
return false;
case IFN_MASK_STORE_LANES:
goto process_args;
case IFN_MASK_LOAD:
case IFN_LEN_LOAD:
+ case IFN_LEN_MASK_LOAD:
case IFN_MASK_LOAD_LANES:
{
ao_ref rhs_ref;
@@ -3065,6 +3067,7 @@ call_may_clobber_ref_p_1 (gcall *call, ao_ref *ref, bool tbaa_p)
return false;
case IFN_MASK_STORE:
case IFN_LEN_STORE:
+ case IFN_LEN_MASK_STORE:
case IFN_MASK_STORE_LANES:
{
tree rhs = gimple_call_arg (call,
@@ -174,6 +174,17 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write, bool may_def_ok = false)
return true;
}
break;
+ case IFN_LEN_MASK_STORE:
+ /* We cannot initialize a must-def ao_ref (in all cases) but we
+ can provide a may-def variant. */
+ if (may_def_ok)
+ {
+ ao_ref_init_from_ptr_and_size
+ (write, gimple_call_arg (stmt, 0),
+ TYPE_SIZE_UNIT (TREE_TYPE (gimple_call_arg (stmt, 4))));
+ return true;
+ }
+ break;
default:;
}
}
@@ -1483,6 +1494,7 @@ dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes)
{
case IFN_LEN_STORE:
case IFN_MASK_STORE:
+ case IFN_LEN_MASK_STORE:
{
enum dse_store_status store_status;
store_status = dse_classify_store (&ref, stmt, false, live_bytes);
@@ -2439,6 +2439,7 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)
case IFN_MASK_LOAD:
case IFN_MASK_LOAD_LANES:
case IFN_LEN_LOAD:
+ case IFN_LEN_MASK_LOAD:
if (op_p == gimple_call_arg_ptr (call, 0))
return TREE_TYPE (gimple_call_lhs (call));
return NULL_TREE;
@@ -2450,6 +2451,11 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)
return TREE_TYPE (gimple_call_arg (call, 3));
return NULL_TREE;
+ case IFN_LEN_MASK_STORE:
+ if (op_p == gimple_call_arg_ptr (call, 0))
+ return TREE_TYPE (gimple_call_arg (call, 4));
+ return NULL_TREE;
+
default:
return NULL_TREE;
}
@@ -7555,6 +7561,8 @@ get_alias_ptr_type_for_ptr_address (iv_use *use)
case IFN_MASK_STORE_LANES:
case IFN_LEN_LOAD:
case IFN_LEN_STORE:
+ case IFN_LEN_MASK_LOAD:
+ case IFN_LEN_MASK_STORE:
/* The second argument contains the correct alias type. */
gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
return TREE_TYPE (gimple_call_arg (call, 1));
@@ -3304,6 +3304,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
if (!tree_fits_uhwi_p (len) || !tree_fits_shwi_p (bias))
return (void *)-1;
break;
+ case IFN_LEN_MASK_STORE:
+ len = gimple_call_arg (call, 2);
+ mask = gimple_call_arg (call, internal_fn_mask_index (fn));
+ if (!tree_fits_uhwi_p (len) || TREE_CODE (mask) != VECTOR_CST)
+ return (void *)-1;
+ break;
default:
return (void *)-1;
}
@@ -3039,7 +3039,8 @@ can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
if (!call2 || !gimple_call_internal_p (call2))
return false;
internal_fn ifn = gimple_call_internal_fn (call1);
- if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
+ if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE
+ && ifn != IFN_LEN_MASK_LOAD && ifn != IFN_LEN_MASK_STORE)
return false;
if (ifn != gimple_call_internal_fn (call2))
return false;
@@ -3049,7 +3050,8 @@ can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
tree mask1 = gimple_call_arg (call1, 2);
tree mask2 = gimple_call_arg (call2, 2);
if (!operand_equal_p (mask1, mask2, 0)
- && (ifn == IFN_MASK_STORE || !allow_slp_p))
+ && (ifn == IFN_MASK_STORE || ifn == IFN_LEN_MASK_STORE
+ || !allow_slp_p))
{
mask1 = strip_conversion (mask1);
if (!mask1)
@@ -4292,7 +4294,9 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
if (gcall *call = dyn_cast <gcall *> (stmt))
if (!gimple_call_internal_p (call)
|| (gimple_call_internal_fn (call) != IFN_MASK_LOAD
- && gimple_call_internal_fn (call) != IFN_MASK_STORE))
+ && gimple_call_internal_fn (call) != IFN_MASK_STORE
+ && gimple_call_internal_fn (call) != IFN_LEN_MASK_LOAD
+ && gimple_call_internal_fn (call) != IFN_LEN_MASK_STORE))
{
free_data_ref (dr);
return opt_result::failure_at (stmt,
@@ -6731,7 +6735,9 @@ vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
if (gimple_call_internal_p (stmt)
&& (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
- || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
+ || gimple_call_internal_fn (stmt) == IFN_MASK_STORE
+ || gimple_call_internal_fn (stmt) == IFN_LEN_MASK_LOAD
+ || gimple_call_internal_fn (stmt) == IFN_LEN_MASK_STORE))
return dr_unaligned_supported;
if (loop_vinfo)
@@ -1296,30 +1296,33 @@ vect_verify_loop_lens (loop_vec_info loop_vinfo)
if (LOOP_VINFO_LENS (loop_vinfo).is_empty ())
return false;
- machine_mode len_load_mode = get_len_load_store_mode
- (loop_vinfo->vector_mode, true).require ();
- machine_mode len_store_mode = get_len_load_store_mode
- (loop_vinfo->vector_mode, false).require ();
+ if (!can_vec_len_mask_load_store_p (loop_vinfo->vector_mode, true)
+ && !can_vec_len_mask_load_store_p (loop_vinfo->vector_mode, false))
+ {
+ machine_mode len_load_mode
+ = get_len_load_store_mode (loop_vinfo->vector_mode, true).require ();
+ machine_mode len_store_mode
+ = get_len_load_store_mode (loop_vinfo->vector_mode, false).require ();
- signed char partial_load_bias = internal_len_load_store_bias
- (IFN_LEN_LOAD, len_load_mode);
+ signed char partial_load_bias
+ = internal_len_load_store_bias (IFN_LEN_LOAD, len_load_mode);
- signed char partial_store_bias = internal_len_load_store_bias
- (IFN_LEN_STORE, len_store_mode);
+ signed char partial_store_bias
+ = internal_len_load_store_bias (IFN_LEN_STORE, len_store_mode);
- gcc_assert (partial_load_bias == partial_store_bias);
+ gcc_assert (partial_load_bias == partial_store_bias);
- if (partial_load_bias == VECT_PARTIAL_BIAS_UNSUPPORTED)
- return false;
+ if (partial_load_bias == VECT_PARTIAL_BIAS_UNSUPPORTED)
+ return false;
- /* If the backend requires a bias of -1 for LEN_LOAD, we must not emit
- len_loads with a length of zero. In order to avoid that we prohibit
- more than one loop length here. */
- if (partial_load_bias == -1
- && LOOP_VINFO_LENS (loop_vinfo).length () > 1)
- return false;
+ /* If the backend requires a bias of -1 for LEN_LOAD, we must not emit
+ len_loads with a length of zero. In order to avoid that we prohibit
+ more than one loop length here. */
+ if (partial_load_bias == -1 && LOOP_VINFO_LENS (loop_vinfo).length () > 1)
+ return false;
- LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo) = partial_load_bias;
+ LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo) = partial_load_bias;
+ }
unsigned int max_nitems_per_iter = 1;
unsigned int i;
@@ -11317,7 +11320,8 @@ optimize_mask_stores (class loop *loop)
gsi_next (&gsi))
{
stmt = gsi_stmt (gsi);
- if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
+ if (gimple_call_internal_p (stmt, IFN_MASK_STORE)
+ || gimple_call_internal_p (stmt, IFN_LEN_MASK_STORE))
worklist.safe_push (stmt);
}
}
@@ -489,6 +489,7 @@ static const int cond_expr_maps[3][5] = {
};
static const int arg1_map[] = { 1, 1 };
static const int arg2_map[] = { 1, 2 };
+static const int arg3_map[] = { 1, 3 };
static const int arg1_arg4_map[] = { 2, 1, 4 };
static const int op1_op0_map[] = { 2, 1, 0 };
@@ -524,6 +525,9 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
case IFN_MASK_LOAD:
return arg2_map;
+ case IFN_LEN_MASK_LOAD:
+ return arg3_map;
+
case IFN_GATHER_LOAD:
return arg1_map;
@@ -1779,6 +1783,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
{
if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
+ || gimple_call_internal_p (stmt, IFN_LEN_MASK_LOAD)
|| gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
|| gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD));
else
@@ -1837,6 +1837,15 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
using_partial_vectors_p = true;
}
+ if (can_vec_len_mask_load_store_p (vecmode, is_load))
+ {
+ nvectors = group_memory_nvectors (group_size * vf, nunits);
+ /* Length is used on loop control and mask for flow control.*/
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+ vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
+ using_partial_vectors_p = true;
+ }
+
if (!using_partial_vectors_p)
{
if (dump_enabled_p ())
@@ -7978,8 +7987,9 @@ vectorizable_store (vec_info *vinfo,
if (memory_access_type == VMAT_CONTIGUOUS)
{
if (!VECTOR_MODE_P (vec_mode)
- || !can_vec_mask_load_store_p (vec_mode,
- TYPE_MODE (mask_vectype), false))
+ || (!can_vec_mask_load_store_p (vec_mode,
+ TYPE_MODE (mask_vectype), false)
+ && !can_vec_len_mask_load_store_p (vec_mode, false)))
return false;
}
else if (memory_access_type != VMAT_LOAD_STORE_LANES
@@ -8942,7 +8952,38 @@ vectorizable_store (vec_info *vinfo,
}
/* Arguments are ready. Create the new vector stmt. */
- if (final_mask)
+ if (can_vec_len_mask_load_store_p (TYPE_MODE (vectype), false)
+ && (final_mask || loop_lens))
+ {
+ tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ if (!final_mask)
+ {
+ machine_mode mask_mode
+ = targetm.vectorize.get_mask_mode (TYPE_MODE (vectype))
+ .require ();
+ mask_vectype
+ = build_truth_vector_type_for_mode (nunits, mask_mode);
+ tree mask = build_int_cst (TREE_TYPE (mask_vectype), -1);
+ final_mask = build_vector_from_val (mask_vectype, mask);
+ }
+ tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+ tree final_len;
+ if (loop_lens)
+ final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+ vec_num * ncopies, vectype,
+ vec_num * j + i, 1);
+ else
+ final_len = build_int_cst (iv_type, nunits);
+ gcall *call
+ = gimple_build_call_internal (IFN_LEN_MASK_STORE, 5,
+ dataref_ptr, ptr, final_len,
+ final_mask, vec_oprnd);
+ gimple_call_set_nothrow (call, true);
+ vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+ new_stmt = call;
+ }
+ else if (final_mask)
{
tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
gcall *call
@@ -9407,8 +9448,9 @@ vectorizable_load (vec_info *vinfo,
{
machine_mode vec_mode = TYPE_MODE (vectype);
if (!VECTOR_MODE_P (vec_mode)
- || !can_vec_mask_load_store_p (vec_mode,
- TYPE_MODE (mask_vectype), true))
+ || (!can_vec_mask_load_store_p (vec_mode,
+ TYPE_MODE (mask_vectype), true)
+ && !can_vec_len_mask_load_store_p (vec_mode, false)))
return false;
}
else if (memory_access_type != VMAT_LOAD_STORE_LANES
@@ -10301,7 +10343,47 @@ vectorizable_load (vec_info *vinfo,
align, misalign);
align = least_bit_hwi (misalign | align);
- if (final_mask)
+ if (can_vec_len_mask_load_store_p (TYPE_MODE (vectype),
+ true)
+ && (final_mask || loop_lens)
+ && memory_access_type != VMAT_INVARIANT)
+ {
+ tree ptr
+ = build_int_cst (ref_type, align * BITS_PER_UNIT);
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ if (!final_mask)
+ {
+ machine_mode mask_mode
+ = targetm.vectorize
+ .get_mask_mode (TYPE_MODE (vectype))
+ .require ();
+ mask_vectype
+ = build_truth_vector_type_for_mode (nunits,
+ mask_mode);
+ tree mask
+ = build_int_cst (TREE_TYPE (mask_vectype), -1);
+ final_mask
+ = build_vector_from_val (mask_vectype, mask);
+ }
+ tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+ tree final_len;
+ if (loop_lens)
+ final_len
+ = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+ vec_num * ncopies, vectype,
+ vec_num * j + i, 1);
+ else
+ final_len = build_int_cst (iv_type, nunits);
+
+ gcall *call
+ = gimple_build_call_internal (IFN_LEN_MASK_LOAD, 4,
+ dataref_ptr, ptr,
+ final_len, final_mask);
+ gimple_call_set_nothrow (call, true);
+ new_stmt = call;
+ data_ref = NULL_TREE;
+ }
+ else if (final_mask)
{
tree ptr = build_int_cst (ref_type,
align * BITS_PER_UNIT);
@@ -13027,7 +13109,8 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
if (gimple_get_lhs (stmt) == NULL_TREE
/* MASK_STORE has no lhs, but is ok. */
- && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
+ && !gimple_call_internal_p (stmt, IFN_MASK_STORE)
+ && !gimple_call_internal_p (stmt, IFN_LEN_MASK_STORE))
{
if (is_a <gcall *> (stmt))
{
@@ -13071,6 +13154,8 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
scalar_type = TREE_TYPE (DR_REF (dr));
else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
+ else if (gimple_call_internal_p (stmt, IFN_LEN_MASK_STORE))
+ scalar_type = TREE_TYPE (gimple_call_arg (stmt, 4));
else
scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
@@ -1101,6 +1101,8 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
{
internal_fn ifn = gimple_call_internal_fn (call);
if (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE
+ || ifn == IFN_LEN_MASK_LOAD
+ || ifn == IFN_LEN_MASK_STORE
/* Don't keep the if-converted parts when the ifn with
specifc type is not supported by the backend. */
|| (direct_internal_fn_p (ifn)