@@ -38,6 +38,9 @@ along with GCC; see the file COPYING3. If not see
#include "expr.h"
#include "langhooks.h"
#include "tm_p.h"
+#include "backend.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
/* Macros to create an enumeration identifier for a function prototype. */
#define RISCV_FTYPE_NAME0(A) RISCV_##A##_FTYPE
@@ -332,6 +335,34 @@ riscv_expand_builtin_direct (enum insn_code icode, rtx target, tree exp,
return riscv_expand_builtin_insn (icode, opno, ops, has_target_p);
}
+/* Implement TARGET_GIMPLE_FOLD_BUILTIN. */
+
+bool
+riscv_gimple_fold_builtin (gimple_stmt_iterator *gsi)
+{
+ gcall *stmt = as_a<gcall *> (gsi_stmt (*gsi));
+ tree fndecl = gimple_call_fndecl (stmt);
+ unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
+ unsigned int subcode = code >> RISCV_BUILTIN_SHIFT;
+ gimple *new_stmt = NULL;
+ switch (code & RISCV_BUILTIN_CLASS)
+ {
+ case RISCV_BUILTIN_GENERAL:
+ new_stmt = NULL;
+ break;
+
+ case RISCV_BUILTIN_VECTOR:
+ new_stmt = riscv_vector::gimple_fold_builtin (subcode, gsi, stmt);
+ break;
+ }
+
+ if (!new_stmt)
+ return false;
+
+ gsi_replace (gsi, new_stmt, false);
+ return true;
+}
+
/* Implement TARGET_EXPAND_BUILTIN. */
rtx
@@ -85,6 +85,7 @@ void riscv_register_pragmas (void);
/* Routines implemented in riscv-builtins.cc. */
extern void riscv_atomic_assign_expand_fenv (tree *, tree *, tree *);
+extern bool riscv_gimple_fold_builtin (gimple_stmt_iterator *);
extern rtx riscv_expand_builtin (tree, rtx, rtx, machine_mode, int);
extern tree riscv_builtin_decl (unsigned int, bool);
extern void riscv_init_builtins (void);
@@ -150,6 +151,7 @@ bool verify_type_context (location_t, type_context_kind, const_tree, bool);
#endif
void handle_pragma_vector (void);
tree builtin_decl (unsigned, bool);
+gimple *gimple_fold_builtin (unsigned int, gimple_stmt_iterator *, gcall *);
rtx expand_builtin (unsigned int, tree, rtx);
bool check_builtin_call (location_t, vec<location_t>, unsigned int,
tree, unsigned int, tree *);
@@ -1529,13 +1529,93 @@ public:
{
rtx src = expand_normal (CALL_EXPR_ARG (e.exp, 0));
rtx index = expand_normal (CALL_EXPR_ARG (e.exp, 1));
- poly_int64 offset = INTVAL (index) * GET_MODE_SIZE (GET_MODE (src));
+ poly_int64 offset = INTVAL (index) * GET_MODE_SIZE (GET_MODE (e.target));
rtx subreg
= simplify_gen_subreg (GET_MODE (e.target), src, GET_MODE (src), offset);
return subreg;
}
};
+class read_vl : public function_base
+{
+public:
+ unsigned int call_properties (const function_instance &) const override
+ {
+ return CP_READ_CSR;
+ }
+
+ rtx expand (function_expander &e) const override
+ {
+ if (Pmode == SImode)
+ emit_insn (gen_read_vlsi (e.target));
+ else
+ emit_insn (gen_read_vldi_zero_extend (e.target));
+ return e.target;
+ }
+};
+
+class vleff : public function_base
+{
+public:
+ unsigned int call_properties (const function_instance &) const override
+ {
+ return CP_READ_MEMORY | CP_WRITE_CSR;
+ }
+
+ gimple *fold (gimple_folder &f) const override
+ {
+ /* fold vleff (const *base, size_t *new_vl, size_t vl)
+
+ ====> vleff (const *base, size_t vl)
+ new_vl = MEM_REF[read_vl ()]. */
+
+ auto_vec<tree, 8> vargs;
+
+ for (unsigned i = 0; i < gimple_call_num_args (f.call); i++)
+ {
+ /* Exclude size_t *new_vl argument. */
+ if (i == gimple_call_num_args (f.call) - 2)
+ continue;
+
+ vargs.quick_push (gimple_call_arg (f.call, i));
+ }
+
+ gimple *repl = gimple_build_call_vec (gimple_call_fn (f.call), vargs);
+ gimple_call_set_lhs (repl, f.lhs);
+
+ /* Handle size_t *new_vl by read_vl. */
+ tree new_vl = gimple_call_arg (f.call, gimple_call_num_args (f.call) - 2);
+ if (integer_zerop (new_vl))
+ {
+ /* This case happens when user passes the nullptr to new_vl argument.
+ In this case, we just need to ignore the new_vl argument and return
+ vleff instruction directly. */
+ return repl;
+ }
+
+ tree tmp_var = create_tmp_var (size_type_node, "new_vl");
+ tree decl = get_read_vl_decl ();
+ gimple *g = gimple_build_call (decl, 0);
+ gimple_call_set_lhs (g, tmp_var);
+ tree indirect
+ = fold_build2 (MEM_REF, size_type_node,
+ gimple_call_arg (f.call,
+ gimple_call_num_args (f.call) - 2),
+ build_int_cst (build_pointer_type (size_type_node), 0));
+ gassign *assign = gimple_build_assign (indirect, tmp_var);
+
+ gsi_insert_after (f.gsi, assign, GSI_SAME_STMT);
+ gsi_insert_after (f.gsi, g, GSI_SAME_STMT);
+ return repl;
+ }
+
+ rtx expand (function_expander &e) const override
+ {
+ return e.use_contiguous_load_insn (
+ code_for_pred_fault_load (e.vector_mode ()));
+ }
+};
+
static CONSTEXPR const vsetvl<false> vsetvl_obj;
static CONSTEXPR const vsetvl<true> vsetvlmax_obj;
static CONSTEXPR const loadstore<false, LST_UNIT_STRIDE, false> vle_obj;
@@ -1744,6 +1824,8 @@ static CONSTEXPR const vlmul_ext vlmul_ext_obj;
static CONSTEXPR const vlmul_trunc vlmul_trunc_obj;
static CONSTEXPR const vset vset_obj;
static CONSTEXPR const vget vget_obj;
+static CONSTEXPR const read_vl read_vl_obj;
+static CONSTEXPR const vleff vleff_obj;
/* Declare the function base NAME, pointing it to an instance
of class <NAME>_obj. */
@@ -1958,5 +2040,7 @@ BASE (vlmul_ext)
BASE (vlmul_trunc)
BASE (vset)
BASE (vget)
+BASE (read_vl)
+BASE (vleff)
} // end namespace riscv_vector
@@ -238,6 +238,8 @@ extern const function_base *const vlmul_ext;
extern const function_base *const vlmul_trunc;
extern const function_base *const vset;
extern const function_base *const vget;
+extern const function_base *const read_vl;
+extern const function_base *const vleff;
}
} // end namespace riscv_vector
@@ -36,6 +36,9 @@ along with GCC; see the file COPYING3. If not see
#define DEF_RVV_FUNCTION(NAME, SHAPE, PREDS, OPS_INFO)
#endif
+/* Internal helper functions for gimple fold use. */
+DEF_RVV_FUNCTION (read_vl, read_vl, none_preds, p_none_void_ops)
+
/* 6. Configuration-Setting Instructions. */
DEF_RVV_FUNCTION (vsetvl, vsetvl, none_preds, i_none_size_size_ops)
@@ -71,7 +74,9 @@ DEF_RVV_FUNCTION (vsoxei16, indexed_loadstore, none_m_preds, all_v_scalar_ptr_ee
DEF_RVV_FUNCTION (vsoxei32, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew32_index_ops)
DEF_RVV_FUNCTION (vsoxei64, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew64_index_ops)
-// TODO: 7.7. Unit-stride Fault-Only-First Loads
+// 7.7. Unit-stride Fault-Only-First Loads
+DEF_RVV_FUNCTION (vleff, fault_load, full_preds, all_v_scalar_const_ptr_size_ptr_ops)
+
// TODO: 7.8. Vector Load/Store Segment Instructions
/* 11. Vector Integer Arithmetic Instructions. */
@@ -497,6 +497,62 @@ struct vget_def : public misc_def
}
};
+/* read_vl_def class. */
+struct read_vl_def : public function_shape
+{
+ void build (function_builder &b,
+ const function_group_info &group) const override
+ {
+ auto_vec<tree> argument_types;
+ b.add_unique_function (get_read_vl_instance (), (*group.shape),
+ size_type_node, argument_types);
+ }
+
+ char *get_name (function_builder &b, const function_instance &instance,
+ bool overloaded_p) const override
+ {
+ if (overloaded_p)
+ return nullptr;
+ b.append_base_name (instance.base_name);
+ return b.finish_name ();
+ }
+};
+
+/* fault_load_def class. */
+struct fault_load_def : public build_base
+{
+ char *get_name (function_builder &b, const function_instance &instance,
+ bool overloaded_p) const override
+ {
+ if (overloaded_p)
+ if (instance.pred == PRED_TYPE_none || instance.pred == PRED_TYPE_mu)
+ return nullptr;
+ tree type = builtin_types[instance.type.index].vector;
+ machine_mode mode = TYPE_MODE (type);
+ int sew = GET_MODE_BITSIZE (GET_MODE_INNER (mode));
+ b.append_name ("__riscv_");
+ b.append_name ("vle");
+ b.append_sew (sew);
+ b.append_name ("ff");
+
+ /* vop<sew>_v --> vop<sew>_v_<type>. */
+ if (!overloaded_p)
+ {
+ /* vop<sew> --> vop<sew>_v. */
+ b.append_name (operand_suffixes[instance.op_info->op]);
+ /* vop<sew>_v --> vop<sew>_v_<type>. */
+ b.append_name (type_suffixes[instance.type.index].vector);
+ }
+
+ /* According to rvv-intrinsic-doc, it does not add "_m" suffix
+ for vop_m C++ overloaded API. */
+ if (overloaded_p && instance.pred == PRED_TYPE_m)
+ return b.finish_name ();
+ b.append_name (predication_suffixes[instance.pred]);
+ return b.finish_name ();
+ }
+};
+
SHAPE(vsetvl, vsetvl)
SHAPE(vsetvl, vsetvlmax)
SHAPE(loadstore, loadstore)
@@ -514,5 +570,7 @@ SHAPE(vundefined, vundefined)
SHAPE(misc, misc)
SHAPE(vset, vset)
SHAPE(vget, vget)
+SHAPE(read_vl, read_vl)
+SHAPE(fault_load, fault_load)
} // end namespace riscv_vector
@@ -41,6 +41,8 @@ extern const function_shape *const vundefined;
extern const function_shape *const misc;
extern const function_shape *const vset;
extern const function_shape *const vget;
+extern const function_shape *const read_vl;
+extern const function_shape *const fault_load;
}
} // end namespace riscv_vector
@@ -45,6 +45,9 @@
#include "targhooks.h"
#include "regs.h"
#include "emit-rtl.h"
+#include "basic-block.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
#include "riscv-vector-builtins.h"
#include "riscv-vector-builtins-shapes.h"
#include "riscv-vector-builtins-bases.h"
@@ -118,6 +121,9 @@ const char *const predication_suffixes[NUM_PRED_TYPES] = {
#include "riscv-vector-builtins.def"
};
+/* A list of all signed integer will be registered for intrinsic functions. */
+static const rvv_type_info none_ops[] = {{NUM_VECTOR_TYPES, 0}};
+
/* A list of all signed integer will be registered for intrinsic functions. */
static const rvv_type_info i_ops[] = {
#define DEF_RVV_I_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE},
@@ -384,6 +390,12 @@ static CONSTEXPR const rvv_arg_type_info size_args[]
static CONSTEXPR const rvv_arg_type_info scalar_const_ptr_args[]
= {rvv_arg_type_info (RVV_BASE_scalar_const_ptr), rvv_arg_type_info_end};
+/* A list of args for vector_type func (const scalar_type *, size_t *) function.
+ */
+static CONSTEXPR const rvv_arg_type_info scalar_const_ptr_size_ptr_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar_const_ptr),
+ rvv_arg_type_info (RVV_BASE_size_ptr), rvv_arg_type_info_end};
+
/* A list of args for void func (scalar_type *, vector_type) function. */
static CONSTEXPR const rvv_arg_type_info scalar_ptr_args[]
= {rvv_arg_type_info (RVV_BASE_scalar_ptr),
@@ -797,6 +809,14 @@ static CONSTEXPR const rvv_op_info all_v_scalar_const_ptr_ops
rvv_arg_type_info (RVV_BASE_vector), /* Return type */
scalar_const_ptr_args /* Args */};
+/* A static operand information for vector_type func (const scalar_type *)
+ * function registration. */
+static CONSTEXPR const rvv_op_info all_v_scalar_const_ptr_size_ptr_ops
+ = {all_ops, /* Types */
+ OP_TYPE_v, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ scalar_const_ptr_size_ptr_args /* Args */};
+
/* A static operand information for void func (scalar_type *, vector_type)
* function registration. */
static CONSTEXPR const rvv_op_info all_v_scalar_ptr_ops
@@ -2103,6 +2123,13 @@ static CONSTEXPR const rvv_op_info all_v_vget_lmul4_x2_ops
rvv_arg_type_info (RVV_BASE_vector), /* Return type */
ext_x2_vget_args /* Args */};
+/* A static operand information for size_t func () function registration. */
+static CONSTEXPR const rvv_op_info p_none_void_ops
+ = {none_ops, /* Types */
+ OP_TYPE_none, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_size), /* Return type */
+ void_args /* Args */};
+
/* A list of all RVV base function types. */
static CONSTEXPR const function_type_info function_types[] = {
#define DEF_RVV_TYPE_INDEX(VECTOR, MASK, SIGNED, UNSIGNED, EEW8_INDEX, EEW16_INDEX, \
@@ -2155,6 +2182,7 @@ static CONSTEXPR const function_type_info function_types[] = {
VECTOR_TYPE_##X16_VLMUL_EXT, \
VECTOR_TYPE_##X32_VLMUL_EXT, \
VECTOR_TYPE_##X64_VLMUL_EXT, \
+ VECTOR_TYPE_INVALID, \
},
#include "riscv-vector-builtins.def"
}; // namespace riscv_vector
@@ -2504,7 +2532,7 @@ rvv_arg_type_info::get_tree_type (vector_type_index type_idx) const
satisfy the require extension of the type. For example,
vfloat32m1_t require floating-point extension. In this case,
just return NULL_TREE. */
- if (!builtin_types[type_idx].vector)
+ if (type_idx != VECTOR_TYPE_INVALID && !builtin_types[type_idx].vector)
return NULL_TREE;
switch (base_type)
@@ -2857,6 +2885,32 @@ function_call_info::function_call_info (location_t location_in,
: function_instance (instance_in), location (location_in), fndecl (fndecl_in)
{}
+gimple_folder::gimple_folder (const function_instance &instance, tree fndecl,
+ gimple_stmt_iterator *gsi_in, gcall *call_in)
+ : function_call_info (gimple_location (call_in), instance, fndecl),
+ gsi (gsi_in), call (call_in), lhs (gimple_call_lhs (call_in))
+{
+}
+
+/* Try to fold the call. Return the new statement on success and null
+ on failure. */
+gimple *
+gimple_folder::fold ()
+{
+ /* Don't fold anything when RVV is disabled; emit an error during
+ expansion instead. */
+ if (!TARGET_VECTOR)
+ return NULL;
+
+ /* Punt if the function has a return type and no result location is
+ provided. The attributes should allow target-independent code to
+ remove the calls if appropriate. */
+ if (!lhs && TREE_TYPE (gimple_call_fntype (call)) != void_type_node)
+ return NULL;
+
+ return base->fold (*this);
+}
+
function_expander::function_expander (const function_instance &instance,
tree fndecl_in, tree exp_in,
rtx target_in)
@@ -3429,6 +3483,16 @@ builtin_decl (unsigned int code, bool)
return (*registered_functions)[code]->decl;
}
+/* Attempt to fold STMT, given that it's a call to the SVE function
+ with subcode CODE. Return the new statement on success and null
+ on failure. Insert any other new statements at GSI. */
+gimple *
+gimple_fold_builtin (unsigned int code, gimple_stmt_iterator *gsi, gcall *stmt)
+{
+ registered_function &rfn = *(*registered_functions)[code];
+ return gimple_folder (rfn.instance, rfn.decl, gsi, stmt).fold ();
+}
+
/* Expand a call to the RVV function with subcode CODE. EXP is the call
expression and TARGET is the preferred location for the result.
Return the value of the lhs. */
@@ -3455,6 +3519,23 @@ check_builtin_call (location_t location, vec<location_t>, unsigned int code,
TREE_TYPE (rfn.decl), nargs, args).check ();
}
+function_instance
+get_read_vl_instance (void)
+{
+ return function_instance ("read_vl", bases::read_vl, shapes::read_vl,
+ none_ops[0], PRED_TYPE_none, &p_none_void_ops);
+}
+
+tree
+get_read_vl_decl (void)
+{
+ function_instance instance = get_read_vl_instance ();
+ hashval_t hash = instance.hash ();
+ registered_function *rfn = function_table->find_with_hash (instance, hash);
+ gcc_assert (rfn);
+ return rfn->decl;
+}
+
} // end namespace riscv_vector
inline void
@@ -375,6 +375,7 @@ DEF_RVV_BASE_TYPE (vlmul_ext_x8, get_vector_type (type_idx))
DEF_RVV_BASE_TYPE (vlmul_ext_x16, get_vector_type (type_idx))
DEF_RVV_BASE_TYPE (vlmul_ext_x32, get_vector_type (type_idx))
DEF_RVV_BASE_TYPE (vlmul_ext_x64, get_vector_type (type_idx))
+DEF_RVV_BASE_TYPE (size_ptr, build_pointer_type (size_type_node))
#include "riscv-vector-type-indexer.gen.def"
@@ -315,6 +315,25 @@ function_call_info::function_returns_void_p ()
return TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node;
}
+/* A class for folding a gimple function call. */
+class gimple_folder : public function_call_info
+{
+public:
+ gimple_folder (const function_instance &, tree, gimple_stmt_iterator *,
+ gcall *);
+
+ gimple *fold ();
+
+ /* Where to insert extra statements that feed the final replacement. */
+ gimple_stmt_iterator *gsi;
+
+ /* The call we're folding. */
+ gcall *call;
+
+ /* The result of the call, or null if none. */
+ tree lhs;
+};
+
/* A class for expanding a function call into RTL. */
class function_expander : public function_call_info
{
@@ -390,6 +409,10 @@ public:
/* Return true if intrinsics has merge operand. */
virtual bool has_merge_operand_p () const;
+ /* Try to fold the given gimple call. Return the new gimple statement
+ on success, otherwise return null. */
+ virtual gimple *fold (gimple_folder &) const { return NULL; }
+
/* Expand the given call into rtl. Return the result of the function,
or an arbitrary value if the function doesn't return a result. */
virtual rtx expand (function_expander &) const = 0;
@@ -456,6 +479,8 @@ extern const char *const operand_suffixes[NUM_OP_TYPES];
extern const rvv_builtin_suffixes type_suffixes[NUM_VECTOR_TYPES + 1];
extern const char *const predication_suffixes[NUM_PRED_TYPES];
extern rvv_builtin_types_t builtin_types[NUM_VECTOR_TYPES + 1];
+extern function_instance get_read_vl_instance (void);
+extern tree get_read_vl_decl (void);
inline tree
rvv_arg_type_info::get_scalar_type (vector_type_index type_idx) const
@@ -106,8 +106,6 @@ using namespace riscv_vector;
static CONSTEXPR const unsigned ALL_SEW[] = {8, 16, 32, 64};
static CONSTEXPR const vlmul_type ALL_LMUL[]
= {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2};
-static CONSTEXPR const demand_type SEW_LMUL_RELATED_DEMAND[]
- = {DEMAND_SEW, DEMAND_LMUL, DEMAND_RATIO, DEMAND_GE_SEW};
DEBUG_FUNCTION void
debug (const vector_insn_info *info)
@@ -198,6 +196,20 @@ scalar_move_insn_p (rtx_insn *rinsn)
|| get_attr_type (rinsn) == TYPE_VFMOVFV;
}
+/* Return true if the instruction is fault first load instruction. */
+static bool
+fault_first_load_p (rtx_insn *rinsn)
+{
+ return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VLDFF;
+}
+
+/* Return true if the instruction is read vl instruction. */
+static bool
+read_vl_insn_p (rtx_insn *rinsn)
+{
+ return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL;
+}
+
/* Return true if it is a vsetvl instruction. */
static bool
vector_config_insn_p (rtx_insn *rinsn)
@@ -386,6 +398,8 @@ available_occurrence_p (const bb_info *bb, const vector_insn_info dem)
for (const insn_info *i = insn; real_insn_and_same_bb_p (i, bb);
i = i->next_nondebug_insn ())
{
+ if (read_vl_insn_p (i->rtl ()))
+ continue;
/* rs1 (avl) are not modified by following statements in
the basic block. */
if (find_access (i->defs (), REGNO (dem.get_avl ())))
@@ -400,72 +414,6 @@ available_occurrence_p (const bb_info *bb, const vector_insn_info dem)
return true;
}
-/* Return true if the block is worthwhile backward propagation. */
-static bool
-backward_propagate_worthwhile_p (const basic_block cfg_bb,
- const vector_block_info block_info)
-{
- if (loop_basic_block_p (cfg_bb))
- {
- if (block_info.reaching_out.valid_or_dirty_p ())
- {
- if (block_info.local_dem.compatible_p (block_info.reaching_out))
- {
- /* Case 1 (Can backward propagate):
- ....
- bb0:
- ...
- for (int i = 0; i < n; i++)
- {
- vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
- __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
- }
- The local_dem is compatible with reaching_out. Such case is
- worthwhile backward propagation. */
- return true;
- }
- else
- {
- /* Case 2 (Don't backward propagate):
- ....
- bb0:
- ...
- for (int i = 0; i < n; i++)
- {
- vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
- __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
- vint16mf2_t v2 = __riscv_vle16_v_i16mf2 (in + i + 6, 8);
- __riscv_vse16_v_i16mf2 (out + i + 6, v, 8);
- }
- The local_dem is incompatible with reaching_out.
- It makes no sense to backward propagate the local_dem since we
- can't avoid VSETVL inside the loop. */
- return false;
- }
- }
- else
- {
- gcc_assert (block_info.reaching_out.unknown_p ());
- /* Case 3 (Don't backward propagate):
- ....
- bb0:
- ...
- for (int i = 0; i < n; i++)
- {
- vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
- __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
- fn3 ();
- }
- The local_dem is VALID, but the reaching_out is UNKNOWN.
- It makes no sense to backward propagate the local_dem since we
- can't avoid VSETVL inside the loop. */
- return false;
- }
- }
-
- return true;
-}
-
static bool
insn_should_be_added_p (const insn_info *insn, unsigned int types)
{
@@ -688,15 +636,20 @@ static rtx
gen_vsetvl_pat (rtx_insn *rinsn, const vector_insn_info &info)
{
rtx new_pat;
+ vl_vtype_info new_info = info;
+ if (info.get_insn () && info.get_insn ()->rtl ()
+ && fault_first_load_p (info.get_insn ()->rtl ()))
+ new_info.set_avl_info (
+ avl_info (get_avl (info.get_insn ()->rtl ()), nullptr));
if (vsetvl_insn_p (rinsn) || vlmax_avl_p (info.get_avl ()))
{
rtx dest = get_vl (rinsn);
- new_pat = gen_vsetvl_pat (VSETVL_NORMAL, info, dest);
+ new_pat = gen_vsetvl_pat (VSETVL_NORMAL, new_info, dest);
}
else if (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only)
- new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
+ new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, new_info, NULL_RTX);
else
- new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info, NULL_RTX);
+ new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX);
return new_pat;
}
@@ -978,6 +931,36 @@ change_insn (rtx_insn *rinsn, rtx new_pat)
}
}
+static const insn_info *
+get_forward_read_vl_insn (const insn_info *insn)
+{
+ const bb_info *bb = insn->bb ();
+ for (const insn_info *i = insn->next_nondebug_insn ();
+ real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
+ {
+ if (find_access (i->defs (), VL_REGNUM))
+ return nullptr;
+ if (read_vl_insn_p (i->rtl ()))
+ return i;
+ }
+ return nullptr;
+}
+
+static const insn_info *
+get_backward_fault_first_load_insn (const insn_info *insn)
+{
+ const bb_info *bb = insn->bb ();
+ for (const insn_info *i = insn->prev_nondebug_insn ();
+ real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ())
+ {
+ if (fault_first_load_p (i->rtl ()))
+ return i;
+ if (find_access (i->defs (), VL_REGNUM))
+ return nullptr;
+ }
+ return nullptr;
+}
+
static bool
change_insn (function_info *ssa, insn_change change, insn_info *insn,
rtx new_pat)
@@ -1083,6 +1066,12 @@ source_equal_p (insn_info *insn1, insn_info *insn2)
rtx note2 = find_reg_equal_equiv_note (rinsn2);
rtx single_set1 = single_set (rinsn1);
rtx single_set2 = single_set (rinsn2);
+ if (read_vl_insn_p (rinsn1) && read_vl_insn_p (rinsn2))
+ {
+ const insn_info *load1 = get_backward_fault_first_load_insn (insn1);
+ const insn_info *load2 = get_backward_fault_first_load_insn (insn2);
+ return load1 && load2 && load1 == load2;
+ }
if (note1 && note2 && rtx_equal_p (note1, note2))
return true;
@@ -1216,21 +1205,6 @@ possible_zero_avl_p (const vector_insn_info &info1,
return !info1.has_non_zero_avl () || !info2.has_non_zero_avl ();
}
-static bool
-first_ratio_invalid_for_second_sew_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- return calculate_vlmul (info2.get_sew (), info1.get_ratio ())
- == LMUL_RESERVED;
-}
-
-static bool
-first_ratio_invalid_for_second_lmul_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- return calculate_sew (info2.get_vlmul (), info1.get_ratio ()) == 0;
-}
-
static bool
second_ratio_invalid_for_first_sew_p (const vector_insn_info &info1,
const vector_insn_info &info2)
@@ -1314,20 +1288,6 @@ second_lmul_less_than_first_lmul_p (const vector_insn_info &info1,
return compare_lmul (info2.get_vlmul (), info1.get_vlmul ()) == -1;
}
-static bool
-first_lmul_less_than_second_lmul_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- return compare_lmul (info1.get_vlmul (), info2.get_vlmul ()) == -1;
-}
-
-static bool
-first_ratio_less_than_second_ratio_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- return info1.get_ratio () < info2.get_ratio ();
-}
-
static bool
second_ratio_less_than_first_ratio_p (const vector_insn_info &info1,
const vector_insn_info &info2)
@@ -1537,6 +1497,100 @@ reg_available_p (const bb_info *bb, const vector_insn_info &info)
insn->bb ()->cfg_bb ());
}
+/* Return true if the instruction support relaxed compatible check. */
+static bool
+support_relaxed_compatible_p (const vector_insn_info &info1,
+ const vector_insn_info &info2)
+{
+ if (fault_first_load_p (info1.get_insn ()->rtl ())
+ && info2.demand_p (DEMAND_AVL) && info2.has_avl_reg ()
+ && info2.get_avl_source () && info2.get_avl_source ()->insn ()->is_phi ())
+ {
+ hash_set<set_info *> sets
+ = get_all_sets (info2.get_avl_source (), true, false, false);
+ for (set_info *set : sets)
+ {
+ if (read_vl_insn_p (set->insn ()->rtl ()))
+ {
+ const insn_info *insn
+ = get_backward_fault_first_load_insn (set->insn ());
+ if (insn == info1.get_insn ())
+ return info2.compatible_vtype_p (info1);
+ }
+ }
+ }
+ return false;
+}
+
+/* Return true if the block is worthwhile backward propagation. */
+static bool
+backward_propagate_worthwhile_p (const basic_block cfg_bb,
+ const vector_block_info block_info)
+{
+ if (loop_basic_block_p (cfg_bb))
+ {
+ if (block_info.reaching_out.valid_or_dirty_p ())
+ {
+ if (block_info.local_dem.compatible_p (block_info.reaching_out))
+ {
+ /* Case 1 (Can backward propagate):
+ ....
+ bb0:
+ ...
+ for (int i = 0; i < n; i++)
+ {
+ vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
+ __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
+ }
+ The local_dem is compatible with reaching_out. Such case is
+ worthwhile backward propagation. */
+ return true;
+ }
+ else
+ {
+ if (support_relaxed_compatible_p (block_info.reaching_out,
+ block_info.local_dem))
+ return true;
+ /* Case 2 (Don't backward propagate):
+ ....
+ bb0:
+ ...
+ for (int i = 0; i < n; i++)
+ {
+ vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
+ __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
+ vint16mf2_t v2 = __riscv_vle16_v_i16mf2 (in + i + 6, 8);
+ __riscv_vse16_v_i16mf2 (out + i + 6, v, 8);
+ }
+ The local_dem is incompatible with reaching_out.
+ It makes no sense to backward propagate the local_dem since we
+ can't avoid VSETVL inside the loop. */
+ return false;
+ }
+ }
+ else
+ {
+ gcc_assert (block_info.reaching_out.unknown_p ());
+ /* Case 3 (Don't backward propagate):
+ ....
+ bb0:
+ ...
+ for (int i = 0; i < n; i++)
+ {
+ vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
+ __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
+ fn3 ();
+ }
+ The local_dem is VALID, but the reaching_out is UNKNOWN.
+ It makes no sense to backward propagate the local_dem since we
+ can't avoid VSETVL inside the loop. */
+ return false;
+ }
+ }
+
+ return true;
+}
+
avl_info::avl_info (const avl_info &other)
{
m_value = other.get_value ();
@@ -1738,27 +1792,29 @@ vl_vtype_info::same_vlmax_p (const vl_vtype_info &other) const
meaning Dem1 is easier be compatible with others than Dem2
or Dem2 is stricter than Dem1.
For example, Dem1 (demand SEW + LMUL) > Dem2 (demand RATIO). */
-bool
-vector_insn_info::operator> (const vector_insn_info &other) const
-{
- if (other.compatible_p (static_cast<const vl_vtype_info &> (*this))
- && !this->compatible_p (static_cast<const vl_vtype_info &> (other)))
- return true;
- return false;
-}
-
bool
vector_insn_info::operator>= (const vector_insn_info &other) const
{
- if (*this > other)
+ if (support_relaxed_compatible_p (*this, other))
+ {
+ unsigned array_size = sizeof (unavailable_conds) / sizeof (demands_cond);
+ /* Bypass AVL unavailable cases. */
+ for (unsigned i = 2; i < array_size; i++)
+ if (unavailable_conds[i].pair.match_cond_p (this->get_demands (),
+ other.get_demands ())
+ && unavailable_conds[i].incompatible_p (*this, other))
+ return false;
+ return true;
+ }
+
+ if (!other.compatible_p (static_cast<const vl_vtype_info &> (*this)))
+ return false;
+ if (!this->compatible_p (static_cast<const vl_vtype_info &> (other)))
return true;
if (*this == other)
return true;
- if (!compatible_p (other))
- return false;
-
for (const auto &cond : unavailable_conds)
if (cond.pair.match_cond_p (this->get_demands (), other.get_demands ())
&& cond.incompatible_p (*this, other))
@@ -1837,7 +1893,7 @@ vector_insn_info::parse_insn (insn_info *insn)
/* If this is something that updates VL/VTYPE that we don't know about, set
the state to unknown. */
- if (!vector_config_insn_p (insn->rtl ())
+ if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())
&& (find_access (insn->defs (), VL_REGNUM)
|| find_access (insn->defs (), VTYPE_REGNUM)))
{
@@ -1922,8 +1978,7 @@ vector_insn_info::compatible_p (const vector_insn_info &other) const
&& "Can't compare invalid demanded infos");
for (const auto &cond : incompatible_conds)
- if (cond.pair.match_cond_p (this->get_demands (), other.get_demands ())
- && cond.incompatible_p (*this, other))
+ if (cond.dual_incompatible_p (*this, other))
return false;
return true;
}
@@ -2154,6 +2209,23 @@ vector_insn_info::merge (const vector_insn_info &merge_info,
return new_info;
}
+bool
+vector_insn_info::update_fault_first_load_avl (insn_info *insn)
+{
+ // Update AVL to vl-output of the fault first load.
+ const insn_info *read_vl = get_forward_read_vl_insn (insn);
+ if (read_vl)
+ {
+ rtx vl = SET_DEST (PATTERN (read_vl->rtl ()));
+ def_info *def = find_access (read_vl->defs (), REGNO (vl));
+ set_info *set = safe_dyn_cast<set_info *> (def);
+ set_avl_info (avl_info (vl, set));
+ set_insn (insn);
+ return true;
+ }
+ return false;
+}
+
void
vector_insn_info::dump (FILE *file) const
{
@@ -2524,6 +2596,7 @@ private:
bool hard_empty_block_p (const bb_info *, const vector_insn_info &) const;
bool backward_demand_fusion (void);
bool forward_demand_fusion (void);
+ // bool local_demand_fusion (void);
bool cleanup_illegal_dirty_blocks (void);
void demand_fusion (void);
@@ -2664,7 +2737,9 @@ pass_vsetvl::transfer_after (vector_insn_info &info, insn_info *insn) const
return;
}
- /* TODO: Support fault first load info update VL in the future. */
+ if (fault_first_load_p (insn->rtl ())
+ && info.update_fault_first_load_avl (insn))
+ return;
/* If this is something that updates VL/VTYPE that we don't know about, set
the state to unknown. */
@@ -3303,6 +3378,7 @@ pass_vsetvl::demand_fusion (void)
help for such cases. */
changed_p |= backward_demand_fusion ();
changed_p |= forward_demand_fusion ();
+ // chanded_p |= local_demand_fusion ();
}
changed_p = true;
@@ -3840,9 +3916,16 @@ pass_vsetvl::cleanup_insns (void) const
use_array new_uses = use_array (uses_builder.finish ());
change.new_uses = new_uses;
change.move_range = insn->ebb ()->insn_range ();
- rtx set = single_set (rinsn);
- rtx src = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
- rtx pat = gen_rtx_SET (SET_DEST (set), src);
+ rtx pat;
+ if (fault_first_load_p (rinsn))
+ pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
+ else
+ {
+ rtx set = single_set (rinsn);
+ rtx src
+ = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
+ pat = gen_rtx_SET (SET_DEST (set), src);
+ }
gcc_assert (change_insn (crtl->ssa, change, insn, pat));
}
}
@@ -117,15 +117,6 @@ DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_TRUE, /*SEW*/ DEMAND_ANY,
/*NONZERO_AVL*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_ANY,
DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
/*COND*/ possible_zero_avl_p)
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_TRUE, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_TRUE, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_TRUE, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ possible_zero_avl_p)
/* Case 8: First SEW/LMUL/GE_SEW <-> Second RATIO/SEW. */
DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
@@ -156,36 +147,7 @@ DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
/*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
/*COND*/ second_sew_less_than_first_sew_p)
-/* Case 9: Second SEW/LMUL/GE_SEW <-> First RATIO/SEW. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ first_ratio_invalid_for_second_sew_p)
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ first_ratio_invalid_for_second_lmul_p)
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ first_sew_less_than_second_sew_p)
-
-/* Case 10: First (GE_SEW + LMUL) <-> Second RATIO. */
+/* Case 9: First (GE_SEW + LMUL) <-> Second RATIO. */
DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
/*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
/*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
@@ -236,58 +198,7 @@ DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
/*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
/*COND*/ first_sew_less_than_second_sew_p)
-/* Case 16: Second (GE_SEW + LMUL) <-> First RATIO. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ first_ratio_less_than_second_ratio_p)
-/* Case 17: Second (SEW + LMUL) <-> First RATIO. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_ratio_p)
-/* Case 18: Second (GE_SEW/SEW + RATIO) <-> First LMUL. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_lmul_p)
-/* Case 19: Second (LMUL + RATIO) <-> First SEW. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_sew_p)
-/* Case 20: Second (LMUL + RATIO) <-> First GE_SEW. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ second_sew_less_than_first_sew_p)
-
-/* Case 18: First SEW + Second LMUL <-> First RATIO. */
+/* Case 16: First SEW + Second LMUL <-> First RATIO. */
DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
/*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
/*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
@@ -297,7 +208,7 @@ DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
/*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
/*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
/*COND*/ different_lmul_p)
-/* Case 19: First SEW + Second LMUL <-> Second RATIO. */
+/* Case 17: First SEW + Second LMUL <-> Second RATIO. */
DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
/*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
/*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
@@ -307,28 +218,8 @@ DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
/*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
/*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
/*COND*/ different_sew_p)
-/* Case 20: Second SEW + First LMUL <-> First RATIO. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_sew_p)
-/* Case 21: Second SEW + First LMUL <-> Second RATIO. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_lmul_p)
-/* Case 22: First SEW + Second RATIO <-> First LMUL. */
+/* Case 18: First SEW + Second RATIO <-> First LMUL. */
DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
/*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
/*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
@@ -338,18 +229,8 @@ DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
/*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
/*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
/*COND*/ different_ratio_p)
-/* Case 23: Second SEW + First RATIO <-> Second LMUL. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_ratio_p)
-/* Case 24: First GE_SEW + Second LMUL <-> First RATIO. */
+/* Case 19: First GE_SEW + Second LMUL <-> First RATIO. */
DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
/*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
/*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
@@ -359,7 +240,7 @@ DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
/*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
/*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
/*COND*/ second_lmul_less_than_first_lmul_p)
-/* Case 25: First GE_SEW + Second LMUL <-> Second RATIO. */
+/* Case 20: First GE_SEW + Second LMUL <-> Second RATIO. */
DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
/*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
/*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
@@ -369,28 +250,8 @@ DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
/*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
/*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
/*COND*/ second_sew_less_than_first_sew_p)
-/* Case 26: Second GE_SEW + First LMUL <-> First RATIO. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ first_sew_less_than_second_sew_p)
-/* Case 27: Second GE_SEW + First LMUL <-> Second RATIO. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ first_lmul_less_than_second_lmul_p)
-/* Case 28: First GE_SEW + Second RATIO <-> First LMUL. */
+/* Case 21: First GE_SEW + Second RATIO <-> First LMUL. */
DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
/*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
/*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
@@ -400,18 +261,8 @@ DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
/*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
/*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
/*COND*/ second_ratio_less_than_first_ratio_p)
-/* Case 29: Second GE_SEW + First RATIO <-> Second LMUL. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ first_ratio_less_than_second_ratio_p)
-/* Case 31: First GE_SEW + Second SEW + First LMUL + Second ratio. */
+/* Case 22: First GE_SEW + Second SEW + First LMUL + Second ratio. */
DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
/*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
/*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
@@ -422,7 +273,7 @@ DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
/*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
/*COND*/ different_lmul_p)
-/* Case 32: First GE_SEW + Second SEW + Second LMUL + First ratio. */
+/* Case 23: First GE_SEW + Second SEW + Second LMUL + First ratio. */
DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
/*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
/*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
@@ -433,28 +284,6 @@ DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
/*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
/*COND*/ different_ratio_p)
-/* Case 33: Second GE_SEW + First SEW + First LMUL + Second ratio. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_ratio_p)
-
-/* Case 34: Second GE_SEW + First SEW + Second LMUL + First ratio. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_lmul_p)
-
/* Merge rules. */
DEF_SEW_LMUL_FUSE_RULE (/*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_FALSE,
/*RATIO*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_TRUE,
@@ -308,7 +308,6 @@ public:
We use RTL_SSA framework to initialize the insn_info. */
void parse_insn (rtl_ssa::insn_info *);
- bool operator> (const vector_insn_info &) const;
bool operator>= (const vector_insn_info &) const;
bool operator== (const vector_insn_info &) const;
@@ -392,6 +391,7 @@ public:
{
return gen_rtx_REG (Pmode, get_avl_source ()->regno ());
}
+ bool update_fault_first_load_avl (rtl_ssa::insn_info *);
void dump (FILE *) const;
};
@@ -479,6 +479,14 @@ struct demands_cond
using CONDITION_TYPE
= bool (*) (const vector_insn_info &, const vector_insn_info &);
CONDITION_TYPE incompatible_p;
+ bool dual_incompatible_p (const vector_insn_info &info1,
+ const vector_insn_info &info2) const
+ {
+ return ((pair.match_cond_p (info1.get_demands (), info2.get_demands ())
+ && incompatible_p (info1, info2))
+ || (pair.match_cond_p (info2.get_demands (), info1.get_demands ())
+ && incompatible_p (info2, info1)));
+ }
};
struct demands_fuse_rule
@@ -7069,6 +7069,9 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
#undef TARGET_BUILTIN_DECL
#define TARGET_BUILTIN_DECL riscv_builtin_decl
+#undef TARGET_GIMPLE_FOLD_BUILTIN
+#define TARGET_GIMPLE_FOLD_BUILTIN riscv_gimple_fold_builtin
+
#undef TARGET_EXPAND_BUILTIN
#define TARGET_EXPAND_BUILTIN riscv_expand_builtin
@@ -1363,7 +1363,9 @@
[(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI
(match_operand:SI 1 "nonimmediate_operand" " r,m")))]
- "TARGET_64BIT && !TARGET_ZBA"
+ "TARGET_64BIT && !TARGET_ZBA
+ && !(REG_P (operands[1])
+ && REGNO (operands[1]) == VL_REGNUM)"
"@
#
lwu\t%0,%1"
@@ -1743,7 +1745,9 @@
[(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r, m, *f,*f,*r,*m,r")
(match_operand:SI 1 "move_operand" " r,T,m,rJ,*r*J,*m,*f,*f,vp"))]
"(register_operand (operands[0], SImode)
- || reg_or_0_operand (operands[1], SImode))"
+ || reg_or_0_operand (operands[1], SImode))
+ && !(register_operand (operands[1], SImode)
+ && REGNO (operands[1]) == VL_REGNUM)"
{ return riscv_output_move (operands[0], operands[1]); }
[(set_attr "move_type" "move,const,load,store,mtc,fpload,mfc,fpstore,rdvlenb")
(set_attr "mode" "SI")
@@ -9,7 +9,8 @@ riscv-vector-builtins.o: $(srcdir)/config/riscv/riscv-vector-builtins.cc \
$(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) $(TM_P_H) \
memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) $(DIAGNOSTIC_H) $(EXPR_H) \
$(FUNCTION_H) fold-const.h gimplify.h explow.h stor-layout.h $(REGS_H) \
- alias.h langhooks.h attribs.h stringpool.h emit-rtl.h \
+ alias.h langhooks.h attribs.h stringpool.h emit-rtl.h basic-block.h \
+ gimple.h gimple-iterator.h \
$(srcdir)/config/riscv/riscv-vector-builtins.h \
$(srcdir)/config/riscv/riscv-vector-builtins-shapes.h \
$(srcdir)/config/riscv/riscv-vector-builtins-bases.h \
@@ -79,6 +79,7 @@
UNSPEC_VRGATHER
UNSPEC_VRGATHEREI16
UNSPEC_VCOMPRESS
+ UNSPEC_VLEFF
])
(define_mode_iterator V [
@@ -215,7 +215,7 @@
vfwcvtftoi,vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof,vfclass,\
vired,viwred,vfredu,vfredo,vfwredu,vfwredo,vimovxv,vfmovfv,\
vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,\
- vgather")
+ vgather,vldff")
(const_int 2)
(eq_attr "type" "vimerge,vfmerge,vcompress")
@@ -228,7 +228,7 @@
;; The index of operand[] to get the avl op.
(define_attr "vl_op_idx" ""
(cond [(eq_attr "type" "vlde,vste,vimov,vfmov,vldm,vstm,vmalu,vsts,vstux,\
- vstox,vext,vmsfs,vmiota,vfsqrt,vfrecp,vfcvtitof,\
+ vstox,vext,vmsfs,vmiota,vfsqrt,vfrecp,vfcvtitof,vldff,\
vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,\
vfncvtftoi,vfncvtftof,vfclass,vimovxv,vfmovfv,vcompress")
(const_int 4)
@@ -260,7 +260,7 @@
(cond [(eq_attr "type" "vlde,vimov,vfmov,vext,vmiota,vfsqrt,vfrecp,\
vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,\
vfncvtitof,vfncvtftoi,vfncvtftof,vfclass,vimovxv,vfmovfv,\
- vcompress")
+ vcompress,vldff")
(symbol_ref "riscv_vector::get_ta(operands[5])")
;; If operands[3] of "vlds" is not vector mode, it is pred_broadcast.
@@ -289,7 +289,7 @@
(define_attr "ma" ""
(cond [(eq_attr "type" "vlde,vext,vmiota,vfsqrt,vfrecp,vfcvtitof,vfcvtftoi,\
vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,vfncvtftoi,\
- vfncvtftof,vfclass")
+ vfncvtftof,vfclass,vldff")
(symbol_ref "riscv_vector::get_ma(operands[6])")
;; If operands[3] of "vlds" is not vector mode, it is pred_broadcast.
@@ -315,7 +315,7 @@
;; The avl type value.
(define_attr "avl_type" ""
- (cond [(eq_attr "type" "vlde,vlde,vste,vimov,vimov,vimov,vfmov,vext,vimerge,\
+ (cond [(eq_attr "type" "vlde,vldff,vste,vimov,vimov,vimov,vfmov,vext,vimerge,\
vfsqrt,vfrecp,vfmerge,vfcvtitof,vfcvtftoi,vfwcvtitof,\
vfwcvtftoi,vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof,\
vfclass,vired,viwred,vfredu,vfredo,vfwredu,vfwredo,\
@@ -6920,3 +6920,46 @@
"vcompress.vm\t%0,%2,%3"
[(set_attr "type" "vcompress")
(set_attr "mode" "<MODE>")])
+
+;; -------------------------------------------------------------------------------
+;; ---- Predicated Fault-Only-First loads
+;; -------------------------------------------------------------------------------
+;; Includes:
+;; - 7.7. Unit-stride Fault-Only-First Loads
+;; -------------------------------------------------------------------------------
+
+(define_insn "read_vlsi"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (reg:SI VL_REGNUM))]
+ "TARGET_VECTOR"
+ "csrr\t%0,vl"
+ [(set_attr "type" "rdvl")
+ (set_attr "mode" "SI")])
+
+(define_insn "read_vldi_zero_extend"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (reg:SI VL_REGNUM)))]
+ "TARGET_VECTOR && TARGET_64BIT"
+ "csrr\t%0,vl"
+ [(set_attr "type" "rdvl")
+ (set_attr "mode" "DI")])
+
+(define_insn "@pred_fault_load<mode>"
+ [(set (match_operand:V 0 "register_operand" "=vd, vr")
+ (if_then_else:V
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1")
+ (match_operand 4 "vector_length_operand" " rK, rK")
+ (match_operand 5 "const_int_operand" " i, i")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:V
+ [(match_operand:V 3 "memory_operand" " m, m")] UNSPEC_VLEFF)
+ (match_operand:V 2 "vector_merge_operand" " 0vu, 0vu")))
+ (set (reg:SI VL_REGNUM) (unspec:SI [(match_dup 0)] UNSPEC_VLEFF))]
+ "TARGET_VECTOR"
+ "vle<sew>ff.v\t%0,%3%p1"
+ [(set_attr "type" "vldff")
+ (set_attr "mode" "<MODE>")])