@@ -18,60 +18,47 @@ You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
-/* This pass is to Set VL/VTYPE global status for RVV instructions
- that depend on VL and VTYPE registers by Lazy code motion (LCM).
-
- Strategy:
-
- - Backward demanded info fusion within block.
-
- - Lazy code motion (LCM) based demanded info backward propagation.
-
- - RTL_SSA framework for def-use, PHI analysis.
-
- - Lazy code motion (LCM) for global VL/VTYPE optimization.
-
- Assumption:
-
- - Each avl operand is either an immediate (must be in range 0 ~ 31) or reg.
-
- This pass consists of 5 phases:
-
- - Phase 1 - compute VL/VTYPE demanded information within each block
- by backward data-flow analysis.
-
- - Phase 2 - Emit vsetvl instructions within each basic block according to
- demand, compute and save ANTLOC && AVLOC of each block.
-
- - Phase 3 - LCM Earliest-edge baseed VSETVL demand fusion.
-
- - Phase 4 - Lazy code motion including: compute local properties,
- pre_edge_lcm and vsetvl insertion && delete edges for LCM results.
-
- - Phase 5 - Cleanup AVL operand of RVV instruction since it will not be
- used any more and VL operand of VSETVL instruction if it is not used by
- any non-debug instructions.
-
- - Phase 6 - DF based post VSETVL optimizations.
-
- Implementation:
-
- - The subroutine of optimize == 0 is simple_vsetvl.
- This function simplily vsetvl insertion for each RVV
- instruction. No optimization.
-
- - The subroutine of optimize > 0 is lazy_vsetvl.
- This function optimize vsetvl insertion process by
- lazy code motion (LCM) layering on RTL_SSA.
-
- - get_avl (), get_insn (), get_avl_source ():
-
- 1. get_insn () is the current instruction, find_access (get_insn
- ())->def is the same as get_avl_source () if get_insn () demand VL.
- 2. If get_avl () is non-VLMAX REG, get_avl () == get_avl_source
- ()->regno ().
- 3. get_avl_source ()->regno () is the REGNO that we backward propagate.
- */
+/* The values of the vl and vtype registers will affect the behavior of RVV
+ insns. That is, when we need to execute an RVV instruction, we need to set
+ the correct vl and vtype values by executing the vsetvl instruction before.
+ Executing the fewest number of vsetvl instructions while keeping the behavior
+ the same is the problem this pass is trying to solve. This vsetvl pass is
+ divided into 5 phases:
+
+ - Phase 1 (fuse local vsetvl infos): traverses each Basic Block, parses
+ each instruction in it that affects vl and vtype state and generates an
+ array of vsetvl_info objects. Then traverse the vsetvl_info array from
+ front to back and perform fusion according to the fusion rules. The fused
+ vsetvl infos are stored in the vsetvl_block_info object's `infos` field.
+
+ - Phase 2 (earliest fuse global vsetvl infos): The header_info and
+ footer_info of vsetvl_block_info are used as expressions, and the
+ earliest of each expression is computed. Based on the earliest
+ information, try to lift up the corresponding vsetvl info to the src
+ basic block of the edge (mainly to reduce the total number of vsetvl
+ instructions, this uplift will cause some execution paths to execute
+ vsetvl instructions that shouldn't be there).
+
+ - Phase 3 (pre global vsetvl info): The header_info and footer_info of
+ vsetvl_block_info are used as expressions, and the LCM algorithm is used
+ to compute the header_info that needs to be deleted and the one that
+ needs to be inserted in some edges.
+
+ - Phase 4 (emit vsetvl insns) : Based on the fusion result of Phase 1 and
+ the deletion and insertion information of Phase 3, the mandatory vsetvl
+ instruction insertion, modification and deletion are performed.
+
+ - Phase 5 (cleanup): Clean up the avl operand in the RVV operator
+ instruction and cleanup the unused dest operand of the vsetvl insn.
+
+ After the Phase 1 a virtual CFG of vsetvl_info is generated. The virtual
+ basic block is represented by vsetvl_block_info, and the virtual vsetvl
+ statements inside are represented by vsetvl_info. The later phases 2 and 3
+ are constantly modifying and adjusting this virtual CFG. Phase 4 performs
+ insertion, modification and deletion of vsetvl instructions based on the
+ optimized virtual CFG. The Phase 1, 2 and 3 do not involve modifications to
+ the RTL.
+*/
#define IN_TARGET_CODE 1
#define INCLUDE_ALGORITHM
@@ -98,61 +85,180 @@ along with GCC; see the file COPYING3. If not see
#include "predict.h"
#include "profile-count.h"
#include "gcse.h"
-#include "riscv-vsetvl.h"
using namespace rtl_ssa;
using namespace riscv_vector;
-static CONSTEXPR const unsigned ALL_SEW[] = {8, 16, 32, 64};
-static CONSTEXPR const vlmul_type ALL_LMUL[]
- = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2};
+/* Set the bitmap DST to the union of SRC of predecessors of
+ basic block B.
+ It's a bit different from bitmap_union_of_preds in cfganal.cc. This function
+ takes into account the case where pred is ENTRY basic block. The main reason
+ for this difference is to make it easier to insert some special value into
+ the ENTRY base block. For example, vsetvl_info with a status of UNKNOW. */
+static void
+bitmap_union_of_preds_with_entry (sbitmap dst, sbitmap *src, basic_block b)
+{
+ unsigned int set_size = dst->size;
+ edge e;
+ unsigned ix;
+
+ for (ix = 0; ix < EDGE_COUNT (b->preds); ix++)
+ {
+ e = EDGE_PRED (b, ix);
+ bitmap_copy (dst, src[e->src->index]);
+ break;
+ }
-DEBUG_FUNCTION void
-debug (const vector_insn_info *info)
+ if (ix == EDGE_COUNT (b->preds))
+ bitmap_clear (dst);
+ else
+ for (ix++; ix < EDGE_COUNT (b->preds); ix++)
+ {
+ unsigned int i;
+ SBITMAP_ELT_TYPE *p, *r;
+
+ e = EDGE_PRED (b, ix);
+ p = src[e->src->index]->elms;
+ r = dst->elms;
+ for (i = 0; i < set_size; i++)
+ *r++ |= *p++;
+ }
+}
+
+/* Compute the reaching defintion in and out based on the gen and KILL
+ informations in each Base Blocks.
+ This function references the compute_avaiable implementation in lcm.cc */
+static void
+compute_reaching_defintion (sbitmap *gen, sbitmap *kill, sbitmap *in,
+ sbitmap *out)
{
- info->dump (stderr);
+ edge e;
+ basic_block *worklist, *qin, *qout, *qend, bb;
+ unsigned int qlen;
+ edge_iterator ei;
+
+ /* Allocate a worklist array/queue. Entries are only added to the
+ list if they were not already on the list. So the size is
+ bounded by the number of basic blocks. */
+ qin = qout = worklist
+ = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
+
+ /* Put every block on the worklist; this is necessary because of the
+ optimistic initialization of AVOUT above. Use reverse postorder
+ to make the forward dataflow problem require less iterations. */
+ int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
+ int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false);
+ for (int i = 0; i < n; ++i)
+ {
+ bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]);
+ *qin++ = bb;
+ bb->aux = bb;
+ }
+ free (rpo);
+
+ qin = worklist;
+ qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS];
+ qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;
+
+ /* Mark blocks which are successors of the entry block so that we
+ can easily identify them below. */
+ FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
+ e->dest->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun);
+
+ /* Iterate until the worklist is empty. */
+ while (qlen)
+ {
+ /* Take the first entry off the worklist. */
+ bb = *qout++;
+ qlen--;
+
+ if (qout >= qend)
+ qout = worklist;
+
+ /* Do not clear the aux field for blocks which are successors of the
+ ENTRY block. That way we never add then to the worklist again. */
+ if (bb->aux != ENTRY_BLOCK_PTR_FOR_FN (cfun))
+ bb->aux = NULL;
+
+ bitmap_union_of_preds_with_entry (in[bb->index], out, bb);
+
+ if (bitmap_ior_and_compl (out[bb->index], gen[bb->index], in[bb->index],
+ kill[bb->index]))
+ /* If the out state of this block changed, then we need
+ to add the successors of this block to the worklist
+ if they are not already on the worklist. */
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
+ {
+ *qin++ = e->dest;
+ e->dest->aux = e;
+ qlen++;
+
+ if (qin >= qend)
+ qin = worklist;
+ }
+ }
+
+ clear_aux_for_edges ();
+ clear_aux_for_blocks ();
+ free (worklist);
}
-DEBUG_FUNCTION void
-debug (const vector_infos_manager *info)
+/* Classification of vsetvl instruction. */
+enum vsetvl_type
{
- info->dump (stderr);
-}
+ VSETVL_NORMAL,
+ VSETVL_VTYPE_CHANGE_ONLY,
+ VSETVL_DISCARD_RESULT,
+ NUM_VSETVL_TYPE
+};
-static bool
-vlmax_avl_p (rtx x)
+enum emit_type
{
- return x && rtx_equal_p (x, RVV_VLMAX);
+ /* emit_insn directly. */
+ EMIT_DIRECT,
+ EMIT_BEFORE,
+ EMIT_AFTER,
+};
+
+/* dump helper functions */
+static const char *
+vlmul_to_str (vlmul_type vlmul)
+{
+ switch (vlmul)
+ {
+ case LMUL_1:
+ return "m1";
+ case LMUL_2:
+ return "m2";
+ case LMUL_4:
+ return "m4";
+ case LMUL_8:
+ return "m8";
+ case LMUL_RESERVED:
+ return "INVALID LMUL";
+ case LMUL_F8:
+ return "mf8";
+ case LMUL_F4:
+ return "mf4";
+ case LMUL_F2:
+ return "mf2";
+
+ default:
+ gcc_unreachable ();
+ }
}
-static bool
-vlmax_avl_insn_p (rtx_insn *rinsn)
+static const char *
+policy_to_str (bool agnostic_p)
{
- return (INSN_CODE (rinsn) == CODE_FOR_vlmax_avlsi
- || INSN_CODE (rinsn) == CODE_FOR_vlmax_avldi);
+ return agnostic_p ? "agnostic" : "undisturbed";
}
-/* Return true if the block is a loop itself:
- local_dem
- __________
- ____|____ |
- | | |
- |________| |
- |_________|
- reaching_out
-*/
static bool
-loop_basic_block_p (const basic_block cfg_bb)
+vlmax_avl_p (rtx x)
{
- if (JUMP_P (BB_END (cfg_bb)) && any_condjump_p (BB_END (cfg_bb)))
- {
- edge e;
- edge_iterator ei;
- FOR_EACH_EDGE (e, ei, cfg_bb->succs)
- if (e->dest->index == cfg_bb->index)
- return true;
- }
- return false;
+ return x && rtx_equal_p (x, RVV_VLMAX);
}
/* Return true if it is an RVV instruction depends on VTYPE global
@@ -171,13 +277,6 @@ has_vl_op (rtx_insn *rinsn)
return recog_memoized (rinsn) >= 0 && get_attr_has_vl_op (rinsn);
}
-/* Is this a SEW value that can be encoded into the VTYPE format. */
-static bool
-valid_sew_p (size_t sew)
-{
- return exact_log2 (sew) && sew >= 8 && sew <= 64;
-}
-
/* Return true if the instruction ignores VLMUL field of VTYPE. */
static bool
ignore_vlmul_insn_p (rtx_insn *rinsn)
@@ -223,7 +322,7 @@ vector_config_insn_p (rtx_insn *rinsn)
static bool
vsetvl_insn_p (rtx_insn *rinsn)
{
- if (!vector_config_insn_p (rinsn))
+ if (!rinsn || !vector_config_insn_p (rinsn))
return false;
return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi
|| INSN_CODE (rinsn) == CODE_FOR_vsetvlsi);
@@ -239,34 +338,13 @@ vsetvl_discard_result_insn_p (rtx_insn *rinsn)
|| INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi);
}
-/* Return true if it is vsetvl zero, zero. */
-static bool
-vsetvl_vtype_change_only_p (rtx_insn *rinsn)
-{
- if (!vector_config_insn_p (rinsn))
- return false;
- return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only);
-}
-
-static bool
-after_or_same_p (const insn_info *insn1, const insn_info *insn2)
-{
- return insn1->compare_with (insn2) >= 0;
-}
-
static bool
real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb)
{
return insn != nullptr && insn->is_real () && insn->bb () == bb;
}
-static bool
-before_p (const insn_info *insn1, const insn_info *insn2)
-{
- return insn1->compare_with (insn2) < 0;
-}
-
-/* Helper function to get VL operand. */
+/* Helper function to get VL operand for VLMAX insn. */
static rtx
get_vl (rtx_insn *rinsn)
{
@@ -278,224 +356,6 @@ get_vl (rtx_insn *rinsn)
return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0));
}
-/* An "anticipatable occurrence" is one that is the first occurrence in the
- basic block, the operands are not modified in the basic block prior
- to the occurrence and the output is not used between the start of
- the block and the occurrence.
-
- For VSETVL instruction, we have these following formats:
- 1. vsetvl zero, rs1.
- 2. vsetvl zero, imm.
- 3. vsetvl rd, rs1.
-
- So base on these circumstances, a DEM is considered as a local anticipatable
- occurrence should satisfy these following conditions:
-
- 1). rs1 (avl) are not modified in the basic block prior to the VSETVL.
- 2). rd (vl) are not modified in the basic block prior to the VSETVL.
- 3). rd (vl) is not used between the start of the block and the occurrence.
-
- Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE
- is modified prior to the occurrence. This case is already considered as
- a non-local anticipatable occurrence.
-*/
-static bool
-anticipatable_occurrence_p (const bb_info *bb, const vector_insn_info dem)
-{
- insn_info *insn = dem.get_insn ();
- /* The only possible operand we care of VSETVL is AVL. */
- if (dem.has_avl_reg ())
- {
- /* rs1 (avl) are not modified in the basic block prior to the VSETVL. */
- rtx avl = dem.get_avl_or_vl_reg ();
- if (dem.dirty_p ())
- {
- gcc_assert (!vsetvl_insn_p (insn->rtl ()));
-
- /* Earliest VSETVL will be inserted at the end of the block. */
- for (const insn_info *i : bb->real_nondebug_insns ())
- {
- /* rs1 (avl) are not modified in the basic block prior to the
- VSETVL. */
- if (find_access (i->defs (), REGNO (avl)))
- return false;
- if (vlmax_avl_p (dem.get_avl ()))
- {
- /* rd (avl) is not used between the start of the block and
- the occurrence. Note: Only for Dirty and VLMAX-avl. */
- if (find_access (i->uses (), REGNO (avl)))
- return false;
- }
- }
-
- return true;
- }
- else if (!vlmax_avl_p (avl))
- {
- set_info *set = dem.get_avl_source ();
- /* If it's undefined, it's not anticipatable conservatively. */
- if (!set)
- return false;
- if (real_insn_and_same_bb_p (set->insn (), bb)
- && before_p (set->insn (), insn))
- return false;
- for (insn_info *i = insn->prev_nondebug_insn ();
- real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ())
- {
- /* rs1 (avl) are not modified in the basic block prior to the
- VSETVL. */
- if (find_access (i->defs (), REGNO (avl)))
- return false;
- }
- }
- }
-
- /* rd (vl) is not used between the start of the block and the occurrence. */
- if (vsetvl_insn_p (insn->rtl ()))
- {
- rtx dest = get_vl (insn->rtl ());
- for (insn_info *i = insn->prev_nondebug_insn ();
- real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ())
- {
- /* rd (vl) is not used between the start of the block and the
- * occurrence. */
- if (find_access (i->uses (), REGNO (dest)))
- return false;
- /* rd (vl) are not modified in the basic block prior to the VSETVL. */
- if (find_access (i->defs (), REGNO (dest)))
- return false;
- }
- }
-
- return true;
-}
-
-/* An "available occurrence" is one that is the last occurrence in the
- basic block and the operands are not modified by following statements in
- the basic block [including this insn].
-
- For VSETVL instruction, we have these following formats:
- 1. vsetvl zero, rs1.
- 2. vsetvl zero, imm.
- 3. vsetvl rd, rs1.
-
- So base on these circumstances, a DEM is considered as a local available
- occurrence should satisfy these following conditions:
-
- 1). rs1 (avl) are not modified by following statements in
- the basic block.
- 2). rd (vl) are not modified by following statements in
- the basic block.
-
- Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE
- is modified prior to the occurrence. This case is already considered as
- a non-local available occurrence.
-*/
-static bool
-available_occurrence_p (const bb_info *bb, const vector_insn_info dem)
-{
- insn_info *insn = dem.get_insn ();
- /* The only possible operand we care of VSETVL is AVL. */
- if (dem.has_avl_reg ())
- {
- if (!vlmax_avl_p (dem.get_avl ()))
- {
- rtx dest = NULL_RTX;
- insn_info *i = insn;
- if (vsetvl_insn_p (insn->rtl ()))
- {
- dest = get_vl (insn->rtl ());
- /* For user vsetvl a2, a2 instruction, we consider it as
- available even though it modifies "a2". */
- i = i->next_nondebug_insn ();
- }
- for (; real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
- {
- if (read_vl_insn_p (i->rtl ()))
- continue;
- /* rs1 (avl) are not modified by following statements in
- the basic block. */
- if (find_access (i->defs (), REGNO (dem.get_avl ())))
- return false;
- /* rd (vl) are not modified by following statements in
- the basic block. */
- if (dest && find_access (i->defs (), REGNO (dest)))
- return false;
- }
- }
- }
- return true;
-}
-
-static bool
-insn_should_be_added_p (const insn_info *insn, unsigned int types)
-{
- if (insn->is_real () && (types & REAL_SET))
- return true;
- if (insn->is_phi () && (types & PHI_SET))
- return true;
- if (insn->is_bb_head () && (types & BB_HEAD_SET))
- return true;
- if (insn->is_bb_end () && (types & BB_END_SET))
- return true;
- return false;
-}
-
-/* Recursively find all define instructions. The kind of instruction is
- specified by the DEF_TYPE. */
-static hash_set<set_info *>
-get_all_sets (phi_info *phi, unsigned int types)
-{
- hash_set<set_info *> insns;
- auto_vec<phi_info *> work_list;
- hash_set<phi_info *> visited_list;
- if (!phi)
- return hash_set<set_info *> ();
- work_list.safe_push (phi);
-
- while (!work_list.is_empty ())
- {
- phi_info *phi = work_list.pop ();
- visited_list.add (phi);
- for (use_info *use : phi->inputs ())
- {
- def_info *def = use->def ();
- set_info *set = safe_dyn_cast<set_info *> (def);
- if (!set)
- return hash_set<set_info *> ();
-
- gcc_assert (!set->insn ()->is_debug_insn ());
-
- if (insn_should_be_added_p (set->insn (), types))
- insns.add (set);
- if (set->insn ()->is_phi ())
- {
- phi_info *new_phi = as_a<phi_info *> (set);
- if (!visited_list.contains (new_phi))
- work_list.safe_push (new_phi);
- }
- }
- }
- return insns;
-}
-
-static hash_set<set_info *>
-get_all_sets (set_info *set, bool /* get_real_inst */ real_p,
- bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p)
-{
- if (real_p && phi_p && param_p)
- return get_all_sets (safe_dyn_cast<phi_info *> (set),
- REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET);
-
- else if (real_p && param_p)
- return get_all_sets (safe_dyn_cast<phi_info *> (set),
- REAL_SET | BB_HEAD_SET | BB_END_SET);
-
- else if (real_p)
- return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET);
- return hash_set<set_info *> ();
-}
-
/* Helper function to get AVL operand. */
static rtx
get_avl (rtx_insn *rinsn)
@@ -511,15 +371,6 @@ get_avl (rtx_insn *rinsn)
return recog_data.operand[get_attr_vl_op_idx (rinsn)];
}
-static set_info *
-get_same_bb_set (hash_set<set_info *> &sets, const basic_block cfg_bb)
-{
- for (set_info *set : sets)
- if (set->bb ()->cfg_bb () == cfg_bb)
- return set;
- return nullptr;
-}
-
/* Helper function to get SEW operand. We always have SEW value for
all RVV instructions that have VTYPE OP. */
static uint8_t
@@ -589,365 +440,186 @@ has_vector_insn (function *fn)
return false;
}
-/* Emit vsetvl instruction. */
-static rtx
-gen_vsetvl_pat (enum vsetvl_type insn_type, const vl_vtype_info &info, rtx vl)
+static vlmul_type
+calculate_vlmul (unsigned int sew, unsigned int ratio)
{
- rtx avl = info.get_avl ();
- /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s,
- set the value of avl to (const_int 0) so that VSETVL PASS will
- insert vsetvl correctly.*/
- if (info.has_avl_no_reg ())
- avl = GEN_INT (0);
- rtx sew = gen_int_mode (info.get_sew (), Pmode);
- rtx vlmul = gen_int_mode (info.get_vlmul (), Pmode);
- rtx ta = gen_int_mode (info.get_ta (), Pmode);
- rtx ma = gen_int_mode (info.get_ma (), Pmode);
-
- if (insn_type == VSETVL_NORMAL)
- {
- gcc_assert (vl != NULL_RTX);
- return gen_vsetvl (Pmode, vl, avl, sew, vlmul, ta, ma);
- }
- else if (insn_type == VSETVL_VTYPE_CHANGE_ONLY)
- return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma);
- else
- return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma);
+ const vlmul_type ALL_LMUL[]
+ = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2};
+ for (const vlmul_type vlmul : ALL_LMUL)
+ if (calculate_ratio (sew, vlmul) == ratio)
+ return vlmul;
+ return LMUL_RESERVED;
}
-static rtx
-gen_vsetvl_pat (rtx_insn *rinsn, const vector_insn_info &info,
- rtx vl = NULL_RTX)
+/* Get the currently supported maximum sew used in the int rvv instructions. */
+static uint8_t
+get_max_int_sew ()
{
- rtx new_pat;
- vl_vtype_info new_info = info;
- if (info.get_insn () && info.get_insn ()->rtl ()
- && fault_first_load_p (info.get_insn ()->rtl ()))
- new_info.set_avl_info (
- avl_info (get_avl (info.get_insn ()->rtl ()), nullptr));
- if (vl)
- new_pat = gen_vsetvl_pat (VSETVL_NORMAL, new_info, vl);
- else
- {
- if (vsetvl_insn_p (rinsn))
- new_pat = gen_vsetvl_pat (VSETVL_NORMAL, new_info, get_vl (rinsn));
- else if (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only)
- new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, new_info, NULL_RTX);
- else
- new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX);
- }
- return new_pat;
+ if (TARGET_VECTOR_ELEN_64)
+ return 64;
+ else if (TARGET_VECTOR_ELEN_32)
+ return 32;
+ gcc_unreachable ();
}
-static void
-emit_vsetvl_insn (enum vsetvl_type insn_type, enum emit_type emit_type,
- const vl_vtype_info &info, rtx vl, rtx_insn *rinsn)
-{
- rtx pat = gen_vsetvl_pat (insn_type, info, vl);
- if (dump_file)
- {
- fprintf (dump_file, "\nInsert vsetvl insn PATTERN:\n");
- print_rtl_single (dump_file, pat);
- fprintf (dump_file, "\nfor insn:\n");
- print_rtl_single (dump_file, rinsn);
- }
-
- if (emit_type == EMIT_DIRECT)
- emit_insn (pat);
- else if (emit_type == EMIT_BEFORE)
- emit_insn_before (pat, rinsn);
- else
- emit_insn_after (pat, rinsn);
+/* Get the currently supported maximum sew used in the float rvv instructions.
+ */
+static uint8_t
+get_max_float_sew ()
+{
+ if (TARGET_VECTOR_ELEN_FP_64)
+ return 64;
+ else if (TARGET_VECTOR_ELEN_FP_32)
+ return 32;
+ else if (TARGET_VECTOR_ELEN_FP_16)
+ return 16;
+ gcc_unreachable ();
}
-static void
-eliminate_insn (rtx_insn *rinsn)
+/* Helper function to get VL operand. */
+static rtx
+get_vl2 (rtx_insn *rinsn)
{
- if (dump_file)
+ if (has_vl_op (rinsn))
{
- fprintf (dump_file, "\nEliminate insn %d:\n", INSN_UID (rinsn));
- print_rtl_single (dump_file, rinsn);
+ extract_insn_cached (rinsn);
+ return recog_data.operand[get_attr_vl_op_idx (rinsn)];
}
- if (in_sequence_p ())
- remove_insn (rinsn);
- else
- delete_insn (rinsn);
+ return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0));
}
-static vsetvl_type
-insert_vsetvl (enum emit_type emit_type, rtx_insn *rinsn,
- const vector_insn_info &info, const vector_insn_info &prev_info)
+/* Count the number of REGNO in RINSN. */
+static int
+count_regno_occurrences (rtx_insn *rinsn, unsigned int regno)
{
- /* Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
- VLMAX. */
- if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
- && info.compatible_avl_p (prev_info) && info.same_vlmax_p (prev_info))
- {
- emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
- rinsn);
- return VSETVL_VTYPE_CHANGE_ONLY;
- }
-
- if (info.has_avl_imm ())
- {
- emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX,
- rinsn);
- return VSETVL_DISCARD_RESULT;
- }
-
- if (info.has_avl_no_reg ())
- {
- /* We can only use x0, x0 if there's no chance of the vtype change causing
- the previous vl to become invalid. */
- if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
- && info.same_vlmax_p (prev_info))
- {
- emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
- rinsn);
- return VSETVL_VTYPE_CHANGE_ONLY;
- }
- /* Otherwise use an AVL of 0 to avoid depending on previous vl. */
- vl_vtype_info new_info = info;
- new_info.set_avl_info (avl_info (const0_rtx, nullptr));
- emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, new_info, NULL_RTX,
- rinsn);
- return VSETVL_DISCARD_RESULT;
- }
-
- /* Use X0 as the DestReg unless AVLReg is X0. We also need to change the
- opcode if the AVLReg is X0 as they have different register classes for
- the AVL operand. */
- if (vlmax_avl_p (info.get_avl ()))
- {
- gcc_assert (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn));
- /* For user vsetvli a5, zero, we should use get_vl to get the VL
- operand "a5". */
- rtx vl_op = info.get_avl_or_vl_reg ();
- gcc_assert (!vlmax_avl_p (vl_op));
- emit_vsetvl_insn (VSETVL_NORMAL, emit_type, info, vl_op, rinsn);
- return VSETVL_NORMAL;
- }
-
- emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, rinsn);
-
- if (dump_file)
- {
- fprintf (dump_file, "Update VL/VTYPE info, previous info=");
- prev_info.dump (dump_file);
- }
- return VSETVL_DISCARD_RESULT;
+ int count = 0;
+ extract_insn (rinsn);
+ for (int i = 0; i < recog_data.n_operands; i++)
+ if (refers_to_regno_p (regno, recog_data.operand[i]))
+ count++;
+ return count;
}
-/* Get VL/VTYPE information for INSN. */
-static vl_vtype_info
-get_vl_vtype_info (const insn_info *insn)
+enum def_type
{
- set_info *set = nullptr;
- rtx avl = ::get_avl (insn->rtl ());
- if (avl && REG_P (avl))
- {
- if (vlmax_avl_p (avl) && has_vl_op (insn->rtl ()))
- set
- = find_access (insn->uses (), REGNO (get_vl (insn->rtl ())))->def ();
- else if (!vlmax_avl_p (avl))
- set = find_access (insn->uses (), REGNO (avl))->def ();
- else
- set = nullptr;
- }
-
- uint8_t sew = get_sew (insn->rtl ());
- enum vlmul_type vlmul = get_vlmul (insn->rtl ());
- uint8_t ratio = get_attr_ratio (insn->rtl ());
- /* when get_attr_ratio is invalid, this kind of instructions
- doesn't care about ratio. However, we still need this value
- in demand info backward analysis. */
- if (ratio == INVALID_ATTRIBUTE)
- ratio = calculate_ratio (sew, vlmul);
- bool ta = tail_agnostic_p (insn->rtl ());
- bool ma = mask_agnostic_p (insn->rtl ());
-
- /* If merge operand is undef value, we prefer agnostic. */
- int merge_op_idx = get_attr_merge_op_idx (insn->rtl ());
- if (merge_op_idx != INVALID_ATTRIBUTE
- && satisfies_constraint_vu (recog_data.operand[merge_op_idx]))
- {
- ta = true;
- ma = true;
- }
-
- vl_vtype_info info (avl_info (avl, set), sew, vlmul, ratio, ta, ma);
- return info;
-}
+ REAL_SET = 1 << 0,
+ PHI_SET = 1 << 1,
+ BB_HEAD_SET = 1 << 2,
+ BB_END_SET = 1 << 3,
+ /* ??? TODO: In RTL_SSA framework, we have REAL_SET,
+ PHI_SET, BB_HEAD_SET, BB_END_SET and
+ CLOBBER_DEF def_info types. Currently,
+ we conservatively do not optimize clobber
+ def since we don't see the case that we
+ need to optimize it. */
+ CLOBBER_DEF = 1 << 4
+};
-/* Change insn and Assert the change always happens. */
-static void
-validate_change_or_fail (rtx object, rtx *loc, rtx new_rtx, bool in_group)
+static bool
+insn_should_be_added_p (const insn_info *insn, unsigned int types)
{
- bool change_p = validate_change (object, loc, new_rtx, in_group);
- gcc_assert (change_p);
+ if (insn->is_real () && (types & REAL_SET))
+ return true;
+ if (insn->is_phi () && (types & PHI_SET))
+ return true;
+ if (insn->is_bb_head () && (types & BB_HEAD_SET))
+ return true;
+ if (insn->is_bb_end () && (types & BB_END_SET))
+ return true;
+ return false;
}
-static void
-change_insn (rtx_insn *rinsn, rtx new_pat)
+static const hash_set<use_info *>
+get_all_real_uses (insn_info *insn, unsigned regno)
{
- /* We don't apply change on RTL_SSA here since it's possible a
- new INSN we add in the PASS before which doesn't have RTL_SSA
- info yet.*/
- if (dump_file)
- {
- fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
- INSN_UID (rinsn));
- print_rtl_single (dump_file, PATTERN (rinsn));
- }
+ gcc_assert (insn->is_real ());
- validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false);
+ hash_set<use_info *> uses;
+ auto_vec<phi_info *> work_list;
+ hash_set<phi_info *> visited_list;
- if (dump_file)
+ for (def_info *def : insn->defs ())
{
- fprintf (dump_file, "\nto:\n");
- print_rtl_single (dump_file, PATTERN (rinsn));
+ if (!def->is_reg () || def->regno () != regno)
+ continue;
+ set_info *set = safe_dyn_cast<set_info *> (def);
+ if (!set)
+ continue;
+ for (use_info *use : set->nondebug_insn_uses ())
+ if (use->insn ()->is_real ())
+ uses.add (use);
+ for (use_info *use : set->phi_uses ())
+ work_list.safe_push (use->phi ());
}
-}
-static const insn_info *
-get_forward_read_vl_insn (const insn_info *insn)
-{
- const bb_info *bb = insn->bb ();
- for (const insn_info *i = insn->next_nondebug_insn ();
- real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
+ while (!work_list.is_empty ())
{
- if (find_access (i->defs (), VL_REGNUM))
- return nullptr;
- if (read_vl_insn_p (i->rtl ()))
- return i;
- }
- return nullptr;
-}
+ phi_info *phi = work_list.pop ();
+ visited_list.add (phi);
-static const insn_info *
-get_backward_fault_first_load_insn (const insn_info *insn)
-{
- const bb_info *bb = insn->bb ();
- for (const insn_info *i = insn->prev_nondebug_insn ();
- real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ())
- {
- if (fault_first_load_p (i->rtl ()))
- return i;
- if (find_access (i->defs (), VL_REGNUM))
- return nullptr;
+ for (use_info *use : phi->nondebug_insn_uses ())
+ if (use->insn ()->is_real ())
+ uses.add (use);
+ for (use_info *use : phi->phi_uses ())
+ if (!visited_list.contains (use->phi ()))
+ work_list.safe_push (use->phi ());
}
- return nullptr;
+ return uses;
}
-static bool
-change_insn (function_info *ssa, insn_change change, insn_info *insn,
- rtx new_pat)
+/* Recursively find all define instructions. The kind of instruction is
+ specified by the DEF_TYPE. */
+static hash_set<set_info *>
+get_all_sets (phi_info *phi, unsigned int types)
{
- rtx_insn *rinsn = insn->rtl ();
- auto attempt = ssa->new_change_attempt ();
- if (!restrict_movement (change))
- return false;
+ hash_set<set_info *> insns;
+ auto_vec<phi_info *> work_list;
+ hash_set<phi_info *> visited_list;
+ if (!phi)
+ return hash_set<set_info *> ();
+ work_list.safe_push (phi);
- if (dump_file)
+ while (!work_list.is_empty ())
{
- fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
- INSN_UID (rinsn));
- print_rtl_single (dump_file, PATTERN (rinsn));
- }
-
- insn_change_watermark watermark;
- validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, true);
-
- /* These routines report failures themselves. */
- if (!recog (attempt, change) || !change_is_worthwhile (change, false))
- return false;
+ phi_info *phi = work_list.pop ();
+ visited_list.add (phi);
+ for (use_info *use : phi->inputs ())
+ {
+ def_info *def = use->def ();
+ set_info *set = safe_dyn_cast<set_info *> (def);
+ if (!set)
+ return hash_set<set_info *> ();
- /* Fix bug:
- (insn 12 34 13 2 (set (reg:RVVM4DI 120 v24 [orig:134 _1 ] [134])
- (if_then_else:RVVM4DI (unspec:RVVMF8BI [
- (const_vector:RVVMF8BI repeat [
- (const_int 1 [0x1])
- ])
- (const_int 0 [0])
- (const_int 2 [0x2]) repeated x2
- (const_int 0 [0])
- (reg:SI 66 vl)
- (reg:SI 67 vtype)
- ] UNSPEC_VPREDICATE)
- (plus:RVVM4DI (reg/v:RVVM4DI 104 v8 [orig:137 op1 ] [137])
- (sign_extend:RVVM4DI (vec_duplicate:RVVM4SI (reg:SI 15 a5
- [140])))) (unspec:RVVM4DI [ (const_int 0 [0]) ] UNSPEC_VUNDEF)))
- "rvv.c":8:12 2784 {pred_single_widen_addsvnx8di_scalar} (expr_list:REG_EQUIV
- (mem/c:RVVM4DI (reg:DI 10 a0 [142]) [1 <retval>+0 S[64, 64] A128])
- (expr_list:REG_EQUAL (if_then_else:RVVM4DI (unspec:RVVMF8BI [
- (const_vector:RVVMF8BI repeat [
- (const_int 1 [0x1])
- ])
- (reg/v:DI 13 a3 [orig:139 vl ] [139])
- (const_int 2 [0x2]) repeated x2
- (const_int 0 [0])
- (reg:SI 66 vl)
- (reg:SI 67 vtype)
- ] UNSPEC_VPREDICATE)
- (plus:RVVM4DI (reg/v:RVVM4DI 104 v8 [orig:137 op1 ] [137])
- (const_vector:RVVM4DI repeat [
- (const_int 2730 [0xaaa])
- ]))
- (unspec:RVVM4DI [
- (const_int 0 [0])
- ] UNSPEC_VUNDEF))
- (nil))))
- Here we want to remove use "a3". However, the REG_EQUAL/REG_EQUIV note use
- "a3" which made us fail in change_insn. We reference to the
- 'aarch64-cc-fusion.cc' and add this method. */
- remove_reg_equal_equiv_notes (rinsn);
- confirm_change_group ();
- ssa->change_insn (change);
+ gcc_assert (!set->insn ()->is_debug_insn ());
- if (dump_file)
- {
- fprintf (dump_file, "\nto:\n");
- print_rtl_single (dump_file, PATTERN (rinsn));
+ if (insn_should_be_added_p (set->insn (), types))
+ insns.add (set);
+ if (set->insn ()->is_phi ())
+ {
+ phi_info *new_phi = as_a<phi_info *> (set);
+ if (!visited_list.contains (new_phi))
+ work_list.safe_push (new_phi);
+ }
+ }
}
- return true;
+ return insns;
}
-static void
-change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info,
- rtx vl = NULL_RTX)
+static hash_set<set_info *>
+get_all_sets (set_info *set, bool /* get_real_inst */ real_p,
+ bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p)
{
- rtx_insn *rinsn;
- if (vector_config_insn_p (insn->rtl ()))
- {
- rinsn = insn->rtl ();
- gcc_assert (vsetvl_insn_p (rinsn) && "Can't handle X0, rs1 vsetvli yet");
- }
- else
- {
- gcc_assert (has_vtype_op (insn->rtl ()));
- rinsn = PREV_INSN (insn->rtl ());
- gcc_assert (vector_config_insn_p (rinsn));
- }
- rtx new_pat = gen_vsetvl_pat (rinsn, info, vl);
- change_insn (rinsn, new_pat);
-}
+ if (real_p && phi_p && param_p)
+ return get_all_sets (safe_dyn_cast<phi_info *> (set),
+ REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET);
-static bool
-avl_source_has_vsetvl_p (set_info *avl_source)
-{
- if (!avl_source)
- return false;
- if (!avl_source->insn ())
- return false;
- if (avl_source->insn ()->is_real ())
- return vsetvl_insn_p (avl_source->insn ()->rtl ());
- hash_set<set_info *> sets = get_all_sets (avl_source, true, false, true);
- for (const auto set : sets)
- {
- if (set->insn ()->is_real () && vsetvl_insn_p (set->insn ()->rtl ()))
- return true;
- }
- return false;
+ else if (real_p && param_p)
+ return get_all_sets (safe_dyn_cast<phi_info *> (set),
+ REAL_SET | BB_HEAD_SET | BB_END_SET);
+
+ else if (real_p)
+ return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET);
+ return hash_set<set_info *> ();
}
static bool
@@ -959,93 +631,14 @@ source_equal_p (insn_info *insn1, insn_info *insn2)
rtx_insn *rinsn2 = insn2->rtl ();
if (!rinsn1 || !rinsn2)
return false;
+
rtx note1 = find_reg_equal_equiv_note (rinsn1);
rtx note2 = find_reg_equal_equiv_note (rinsn2);
- rtx single_set1 = single_set (rinsn1);
- rtx single_set2 = single_set (rinsn2);
- if (read_vl_insn_p (rinsn1) && read_vl_insn_p (rinsn2))
- {
- const insn_info *load1 = get_backward_fault_first_load_insn (insn1);
- const insn_info *load2 = get_backward_fault_first_load_insn (insn2);
- return load1 && load2 && load1 == load2;
- }
-
if (note1 && note2 && rtx_equal_p (note1, note2))
return true;
-
- /* Since vsetvl instruction is not single SET.
- We handle this case specially here. */
- if (vsetvl_insn_p (insn1->rtl ()) && vsetvl_insn_p (insn2->rtl ()))
- {
- /* For example:
- vsetvl1 a6,a5,e32m1
- RVV 1 (use a6 as AVL)
- vsetvl2 a5,a5,e8mf4
- RVV 2 (use a5 as AVL)
- We consider AVL of RVV 1 and RVV 2 are same so that we can
- gain more optimization opportunities.
-
- Note: insn1_info.compatible_avl_p (insn2_info)
- will make sure there is no instruction between vsetvl1 and vsetvl2
- modify a5 since their def will be different if there is instruction
- modify a5 and compatible_avl_p will return false. */
- vector_insn_info insn1_info, insn2_info;
- insn1_info.parse_insn (insn1);
- insn2_info.parse_insn (insn2);
-
- /* To avoid dead loop, we don't optimize a vsetvli def has vsetvli
- instructions which will complicate the situation. */
- if (avl_source_has_vsetvl_p (insn1_info.get_avl_source ())
- || avl_source_has_vsetvl_p (insn2_info.get_avl_source ()))
- return false;
-
- if (insn1_info.same_vlmax_p (insn2_info)
- && insn1_info.compatible_avl_p (insn2_info))
- return true;
- }
-
- /* We only handle AVL is set by instructions with no side effects. */
- if (!single_set1 || !single_set2)
- return false;
- if (!rtx_equal_p (SET_SRC (single_set1), SET_SRC (single_set2)))
- return false;
- /* RTL_SSA uses include REG_NOTE. Consider this following case:
-
- insn1 RTL:
- (insn 41 39 42 4 (set (reg:DI 26 s10 [orig:159 loop_len_46 ] [159])
- (umin:DI (reg:DI 15 a5 [orig:201 _149 ] [201])
- (reg:DI 14 a4 [276]))) 408 {*umindi3}
- (expr_list:REG_EQUAL (umin:DI (reg:DI 15 a5 [orig:201 _149 ] [201])
- (const_int 2 [0x2]))
- (nil)))
- The RTL_SSA uses of this instruction has 2 uses:
- 1. (reg:DI 15 a5 [orig:201 _149 ] [201]) - twice.
- 2. (reg:DI 14 a4 [276]) - once.
-
- insn2 RTL:
- (insn 38 353 351 4 (set (reg:DI 27 s11 [orig:160 loop_len_47 ] [160])
- (umin:DI (reg:DI 15 a5 [orig:199 _146 ] [199])
- (reg:DI 14 a4 [276]))) 408 {*umindi3}
- (expr_list:REG_EQUAL (umin:DI (reg:DI 28 t3 [orig:200 ivtmp_147 ] [200])
- (const_int 2 [0x2]))
- (nil)))
- The RTL_SSA uses of this instruction has 3 uses:
- 1. (reg:DI 15 a5 [orig:199 _146 ] [199]) - once
- 2. (reg:DI 14 a4 [276]) - once
- 3. (reg:DI 28 t3 [orig:200 ivtmp_147 ] [200]) - once
-
- Return false when insn1->uses ().size () != insn2->uses ().size ()
- */
- if (insn1->uses ().size () != insn2->uses ().size ())
- return false;
- for (size_t i = 0; i < insn1->uses ().size (); i++)
- if (insn1->uses ()[i] != insn2->uses ()[i])
- return false;
- return true;
+ return false;
}
-/* Helper function to get single same real RTL source.
- return NULL if it is not a single real RTL source. */
static insn_info *
extract_single_source (set_info *set)
{
@@ -1066,7 +659,7 @@ extract_single_source (set_info *set)
NULL so that VSETVL PASS will insert vsetvl directly. */
if (set->insn ()->is_artificial ())
return nullptr;
- if (!source_equal_p (set->insn (), first_insn))
+ if (set != *sets.begin () && !source_equal_p (set->insn (), first_insn))
return nullptr;
}
@@ -1074,3115 +667,2825 @@ extract_single_source (set_info *set)
}
static unsigned
-calculate_sew (vlmul_type vlmul, unsigned int ratio)
+get_expr_id (unsigned bb_index, unsigned regno, unsigned num_bbs)
{
- for (const unsigned sew : ALL_SEW)
- if (calculate_ratio (sew, vlmul) == ratio)
- return sew;
- return 0;
+ return regno * num_bbs + bb_index;
}
-
-static vlmul_type
-calculate_vlmul (unsigned int sew, unsigned int ratio)
+static unsigned
+get_regno (unsigned expr_id, unsigned num_bb)
{
- for (const vlmul_type vlmul : ALL_LMUL)
- if (calculate_ratio (sew, vlmul) == ratio)
- return vlmul;
- return LMUL_RESERVED;
+ return expr_id / num_bb;
}
+static unsigned
+get_bb_index (unsigned expr_id, unsigned num_bb)
+{
+ return expr_id % num_bb;
+}
+
+/* This flags indicates the minimum demand of the vl and vtype values by the
+ RVV instruction. For example, DEMAND_RATIO_P indicates that this RVV
+ instruction only needs the SEW/LMUL ratio to remain the same, and does not
+ require SEW and LMUL to be fixed.
+ Therefore, if the former RVV instruction needs DEMAND_RATIO_P and the latter
+ instruction needs DEMAND_SEW_LMUL_P and its SEW/LMUL is the same as that of
+ the former instruction, then we can make the minimu demand of the former
+ instruction strict to DEMAND_SEW_LMUL_P, and its required SEW and LMUL are
+ the SEW and LMUL of the latter instruction, and the vsetvl instruction
+ generated according to the new demand can also be used for the latter
+ instruction, so there is no need to insert a separate vsetvl instruction for
+ the latter instruction. */
+enum demand_flags : unsigned
+{
+ DEMAND_EMPTY_P = 0,
+ DEMAND_SEW_P = 1 << 0,
+ DEMAND_LMUL_P = 1 << 1,
+ DEMAND_RATIO_P = 1 << 2,
+ DEMAND_GE_SEW_P = 1 << 3,
+ DEMAND_TAIL_POLICY_P = 1 << 4,
+ DEMAND_MASK_POLICY_P = 1 << 5,
+ DEMAND_AVL_P = 1 << 6,
+ DEMAND_NON_ZERO_AVL_P = 1 << 7,
+};
-static bool
-incompatible_avl_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- return !info1.compatible_avl_p (info2) && !info2.compatible_avl_p (info1);
-}
+/* We split the demand information into three parts. They are sew and lmul
+ related (sew_lmul_demand_type), tail and mask policy related
+ (policy_demand_type) and avl related (avl_demand_type). Then we define three
+ interfaces avaiable_with, compatible_with and merge_with. avaiable_with is
+ used to determine whether the two vsetvl infos prev_info and next_info are
+ available or not. If prev_info is available for next_info, it means that the
+ RVV insn corresponding to next_info on the path from prev_info to next_info
+ can be used without inserting a separate vsetvl instruction. compatible_with
+ is used to determine whether prev_info is compatible with next_info, and if
+ so, merge_with can be used to merge the stricter demand information from
+ next_info into prev_info so that prev_info becomes available to next_info.
+ */
-static bool
-different_sew_p (const vector_insn_info &info1, const vector_insn_info &info2)
+enum class sew_lmul_demand_type : unsigned
{
- return info1.get_sew () != info2.get_sew ();
-}
+ sew_lmul = demand_flags::DEMAND_SEW_P | demand_flags::DEMAND_LMUL_P,
+ ratio_only = demand_flags::DEMAND_RATIO_P,
+ sew_only = demand_flags::DEMAND_SEW_P,
+ ge_sew = demand_flags::DEMAND_GE_SEW_P,
+ ratio_and_ge_sew
+ = demand_flags::DEMAND_RATIO_P | demand_flags::DEMAND_GE_SEW_P,
+};
-static bool
-different_lmul_p (const vector_insn_info &info1, const vector_insn_info &info2)
+enum class policy_demand_type : unsigned
{
- return info1.get_vlmul () != info2.get_vlmul ();
-}
+ tail_mask_policy
+ = demand_flags::DEMAND_TAIL_POLICY_P | demand_flags::DEMAND_MASK_POLICY_P,
+ tail_policy_only = demand_flags::DEMAND_TAIL_POLICY_P,
+ mask_policy_only = demand_flags::DEMAND_MASK_POLICY_P,
+ ignore_policy = demand_flags::DEMAND_EMPTY_P,
+};
-static bool
-different_ratio_p (const vector_insn_info &info1, const vector_insn_info &info2)
+enum class avl_demand_type : unsigned
{
- return info1.get_ratio () != info2.get_ratio ();
-}
+ avl = demand_flags::DEMAND_AVL_P,
+ non_zero_avl = demand_flags::DEMAND_NON_ZERO_AVL_P,
+ ignore_avl = demand_flags::DEMAND_EMPTY_P,
+};
-static bool
-different_tail_policy_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- return info1.get_ta () != info2.get_ta ();
-}
-static bool
-different_mask_policy_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
+class vsetvl_info
{
- return info1.get_ma () != info2.get_ma ();
-}
+private:
+ insn_info *m_insn;
+ bb_info *m_bb;
+ rtx m_avl;
+ rtx m_vl;
+ set_info *m_avl_def;
+ uint8_t m_sew;
+ uint8_t m_max_sew;
+ vlmul_type m_vlmul;
+ uint8_t m_ratio;
+ bool m_ta;
+ bool m_ma;
+
+ sew_lmul_demand_type m_sew_lmul_demand;
+ policy_demand_type m_policy_demand;
+ avl_demand_type m_avl_demand;
+
+ enum class state_type
+ {
+ UNINITIALIZED,
+ VALID,
+ UNKNOWN,
+ EMPTY,
+ };
+ state_type m_state;
+
+ bool m_ignore;
+ bool change_vtype_only;
+ insn_info *m_read_vl_insn;
+ bool use_by_non_rvv_insn;
-static bool
-possible_zero_avl_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- return !info1.has_non_zero_avl () || !info2.has_non_zero_avl ();
-}
-
-static bool
-second_ratio_invalid_for_first_sew_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- return calculate_vlmul (info1.get_sew (), info2.get_ratio ())
- == LMUL_RESERVED;
-}
-
-static bool
-second_ratio_invalid_for_first_lmul_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- return calculate_sew (info1.get_vlmul (), info2.get_ratio ()) == 0;
-}
-
-static bool
-float_insn_valid_sew_p (const vector_insn_info &info, unsigned int sew)
-{
- if (info.get_insn () && info.get_insn ()->is_real ()
- && get_attr_type (info.get_insn ()->rtl ()) == TYPE_VFMOVFV)
- {
- if (sew == 16)
- return TARGET_VECTOR_ELEN_FP_16;
- else if (sew == 32)
- return TARGET_VECTOR_ELEN_FP_32;
- else if (sew == 64)
- return TARGET_VECTOR_ELEN_FP_64;
- }
- return true;
-}
-
-static bool
-second_sew_less_than_first_sew_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- return info2.get_sew () < info1.get_sew ()
- || !float_insn_valid_sew_p (info1, info2.get_sew ());
-}
-
-static bool
-first_sew_less_than_second_sew_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- return info1.get_sew () < info2.get_sew ()
- || !float_insn_valid_sew_p (info2, info1.get_sew ());
-}
-
-/* return 0 if LMUL1 == LMUL2.
- return -1 if LMUL1 < LMUL2.
- return 1 if LMUL1 > LMUL2. */
-static int
-compare_lmul (vlmul_type vlmul1, vlmul_type vlmul2)
-{
- if (vlmul1 == vlmul2)
- return 0;
-
- switch (vlmul1)
- {
- case LMUL_1:
- if (vlmul2 == LMUL_2 || vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
- return 1;
- else
- return -1;
- case LMUL_2:
- if (vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
- return 1;
- else
- return -1;
- case LMUL_4:
- if (vlmul2 == LMUL_8)
- return 1;
- else
- return -1;
- case LMUL_8:
- return -1;
- case LMUL_F2:
- if (vlmul2 == LMUL_1 || vlmul2 == LMUL_2 || vlmul2 == LMUL_4
- || vlmul2 == LMUL_8)
- return 1;
- else
- return -1;
- case LMUL_F4:
- if (vlmul2 == LMUL_F2 || vlmul2 == LMUL_1 || vlmul2 == LMUL_2
- || vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
- return 1;
- else
- return -1;
- case LMUL_F8:
- return 0;
- default:
- gcc_unreachable ();
- }
-}
-
-static bool
-second_lmul_less_than_first_lmul_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- return compare_lmul (info2.get_vlmul (), info1.get_vlmul ()) == -1;
-}
-
-static bool
-second_ratio_less_than_first_ratio_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- return info2.get_ratio () < info1.get_ratio ();
-}
-
-static CONSTEXPR const demands_cond incompatible_conds[] = {
-#define DEF_INCOMPATIBLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, \
- GE_SEW1, TAIL_POLICTY1, MASK_POLICY1, AVL2, \
- SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, \
- TAIL_POLICTY2, MASK_POLICY2, COND) \
- {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \
- MASK_POLICY1}, \
- {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
- MASK_POLICY2}}, \
- COND},
-#include "riscv-vsetvl.def"
-};
-
-static unsigned
-greatest_sew (const vector_insn_info &info1, const vector_insn_info &info2)
-{
- return std::max (info1.get_sew (), info2.get_sew ());
-}
-
-static unsigned
-first_sew (const vector_insn_info &info1, const vector_insn_info &)
-{
- return info1.get_sew ();
-}
-
-static unsigned
-second_sew (const vector_insn_info &, const vector_insn_info &info2)
-{
- return info2.get_sew ();
-}
-
-static vlmul_type
-first_vlmul (const vector_insn_info &info1, const vector_insn_info &)
-{
- return info1.get_vlmul ();
-}
-
-static vlmul_type
-second_vlmul (const vector_insn_info &, const vector_insn_info &info2)
-{
- return info2.get_vlmul ();
-}
-
-static unsigned
-first_ratio (const vector_insn_info &info1, const vector_insn_info &)
-{
- return info1.get_ratio ();
-}
-
-static unsigned
-second_ratio (const vector_insn_info &, const vector_insn_info &info2)
-{
- return info2.get_ratio ();
-}
-
-static vlmul_type
-vlmul_for_first_sew_second_ratio (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- return calculate_vlmul (info1.get_sew (), info2.get_ratio ());
-}
-
-static vlmul_type
-vlmul_for_greatest_sew_second_ratio (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- return calculate_vlmul (MAX (info1.get_sew (), info2.get_sew ()),
- info2.get_ratio ());
-}
-
-static unsigned
-ratio_for_second_sew_first_vlmul (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- return calculate_ratio (info2.get_sew (), info1.get_vlmul ());
-}
-
-static CONSTEXPR const demands_fuse_rule fuse_rules[] = {
-#define DEF_SEW_LMUL_FUSE_RULE(DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, \
- DEMAND_GE_SEW1, DEMAND_SEW2, DEMAND_LMUL2, \
- DEMAND_RATIO2, DEMAND_GE_SEW2, NEW_DEMAND_SEW, \
- NEW_DEMAND_LMUL, NEW_DEMAND_RATIO, \
- NEW_DEMAND_GE_SEW, NEW_SEW, NEW_VLMUL, \
- NEW_RATIO) \
- {{{DEMAND_ANY, DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, DEMAND_ANY, \
- DEMAND_GE_SEW1, DEMAND_ANY, DEMAND_ANY}, \
- {DEMAND_ANY, DEMAND_SEW2, DEMAND_LMUL2, DEMAND_RATIO2, DEMAND_ANY, \
- DEMAND_GE_SEW2, DEMAND_ANY, DEMAND_ANY}}, \
- NEW_DEMAND_SEW, \
- NEW_DEMAND_LMUL, \
- NEW_DEMAND_RATIO, \
- NEW_DEMAND_GE_SEW, \
- NEW_SEW, \
- NEW_VLMUL, \
- NEW_RATIO},
-#include "riscv-vsetvl.def"
-};
-
-static bool
-always_unavailable (const vector_insn_info &, const vector_insn_info &)
-{
- return true;
-}
-
-static bool
-avl_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2)
-{
- return !info2.compatible_avl_p (info1.get_avl_info ());
-}
-
-static bool
-sew_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2)
-{
- if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO))
- {
- if (info2.demand_p (DEMAND_GE_SEW))
- return info1.get_sew () < info2.get_sew ();
- return info1.get_sew () != info2.get_sew ();
- }
- return true;
-}
-
-static bool
-lmul_unavailable_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- if (info1.get_vlmul () == info2.get_vlmul () && !info2.demand_p (DEMAND_SEW)
- && !info2.demand_p (DEMAND_RATIO))
- return false;
- return true;
-}
-
-static bool
-ge_sew_unavailable_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO)
- && info2.demand_p (DEMAND_GE_SEW))
- return info1.get_sew () < info2.get_sew ();
- return true;
-}
-
-static bool
-ge_sew_lmul_unavailable_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- if (!info2.demand_p (DEMAND_RATIO) && info2.demand_p (DEMAND_GE_SEW))
- return info1.get_sew () < info2.get_sew ();
- return true;
-}
-
-static bool
-ge_sew_ratio_unavailable_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- if (!info2.demand_p (DEMAND_LMUL))
- {
- if (info2.demand_p (DEMAND_GE_SEW))
- return info1.get_sew () < info2.get_sew ();
- /* Demand GE_SEW should be available for non-demand SEW. */
- else if (!info2.demand_p (DEMAND_SEW))
- return false;
- }
- return true;
-}
-
-static CONSTEXPR const demands_cond unavailable_conds[] = {
-#define DEF_UNAVAILABLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, \
- TAIL_POLICTY1, MASK_POLICY1, AVL2, SEW2, LMUL2, \
- RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
- MASK_POLICY2, COND) \
- {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \
- MASK_POLICY1}, \
- {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
- MASK_POLICY2}}, \
- COND},
-#include "riscv-vsetvl.def"
-};
-
-static bool
-same_sew_lmul_demand_p (const bool *dems1, const bool *dems2)
-{
- return dems1[DEMAND_SEW] == dems2[DEMAND_SEW]
- && dems1[DEMAND_LMUL] == dems2[DEMAND_LMUL]
- && dems1[DEMAND_RATIO] == dems2[DEMAND_RATIO] && !dems1[DEMAND_GE_SEW]
- && !dems2[DEMAND_GE_SEW];
-}
-
-static bool
-propagate_avl_across_demands_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- if (info2.demand_p (DEMAND_AVL))
- {
- if (info2.demand_p (DEMAND_NONZERO_AVL))
- return info1.demand_p (DEMAND_AVL)
- && !info1.demand_p (DEMAND_NONZERO_AVL) && info1.has_avl_reg ();
- }
- else
- return info1.demand_p (DEMAND_AVL) && info1.has_avl_reg ();
- return false;
-}
-
-static bool
-reg_available_p (const insn_info *insn, const vector_insn_info &info)
-{
- if (info.has_avl_reg () && !info.get_avl_source ())
- return false;
- insn_info *def_insn = info.get_avl_source ()->insn ();
- if (def_insn->bb () == insn->bb ())
- return before_p (def_insn, insn);
- else
- return dominated_by_p (CDI_DOMINATORS, insn->bb ()->cfg_bb (),
- def_insn->bb ()->cfg_bb ());
-}
-
-/* Return true if the instruction support relaxed compatible check. */
-static bool
-support_relaxed_compatible_p (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- if (fault_first_load_p (info1.get_insn ()->rtl ())
- && info2.demand_p (DEMAND_AVL) && info2.has_avl_reg ()
- && info2.get_avl_source () && info2.get_avl_source ()->insn ()->is_phi ())
- {
- hash_set<set_info *> sets
- = get_all_sets (info2.get_avl_source (), true, false, false);
- for (set_info *set : sets)
- {
- if (read_vl_insn_p (set->insn ()->rtl ()))
- {
- const insn_info *insn
- = get_backward_fault_first_load_insn (set->insn ());
- if (insn == info1.get_insn ())
- return info2.compatible_vtype_p (info1);
- }
- }
- }
- return false;
-}
-
-/* Count the number of REGNO in RINSN. */
-static int
-count_regno_occurrences (rtx_insn *rinsn, unsigned int regno)
-{
- int count = 0;
- extract_insn (rinsn);
- for (int i = 0; i < recog_data.n_operands; i++)
- if (refers_to_regno_p (regno, recog_data.operand[i]))
- count++;
- return count;
-}
-
-/* Return TRUE if the demands can be fused. */
-static bool
-demands_can_be_fused_p (const vector_insn_info &be_fused,
- const vector_insn_info &to_fuse)
-{
- return be_fused.compatible_p (to_fuse) && !be_fused.available_p (to_fuse);
-}
-
-/* Return true if we can fuse VSETVL demand info into predecessor of earliest
- * edge. */
-static bool
-earliest_pred_can_be_fused_p (const bb_info *earliest_pred,
- const vector_insn_info &earliest_info,
- const vector_insn_info &expr, rtx *vlmax_vl)
-{
- /* Backward VLMAX VL:
- bb 3:
- vsetivli zero, 1 ... -> vsetvli t1, zero
- vmv.s.x
- bb 5:
- vsetvli t1, zero ... -> to be elided.
- vlse16.v
-
- We should forward "t1". */
- if (!earliest_info.has_avl_reg () && expr.has_avl_reg ())
- {
- rtx avl_or_vl_reg = expr.get_avl_or_vl_reg ();
- gcc_assert (avl_or_vl_reg);
- const insn_info *last_insn = earliest_info.get_insn ();
- /* To fuse demand on earlest edge, we make sure AVL/VL
- didn't change from the consume insn to the predecessor
- of the edge. */
- for (insn_info *i = earliest_pred->end_insn ()->prev_nondebug_insn ();
- real_insn_and_same_bb_p (i, earliest_pred)
- && after_or_same_p (i, last_insn);
- i = i->prev_nondebug_insn ())
- {
- if (find_access (i->defs (), REGNO (avl_or_vl_reg)))
- return false;
- if (find_access (i->uses (), REGNO (avl_or_vl_reg)))
- return false;
- }
- if (vlmax_vl && vlmax_avl_p (expr.get_avl ()))
- *vlmax_vl = avl_or_vl_reg;
- }
-
- return true;
-}
-
-/* Return true if the current VSETVL 1 is dominated by preceding VSETVL 2.
-
- VSETVL 2 dominates VSETVL 1 should satisfy this following check:
-
- - VSETVL 2 should have the RATIO (SEW/LMUL) with VSETVL 1.
- - VSETVL 2 is user vsetvl (vsetvl VL, AVL)
- - VSETVL 2 "VL" result is the "AVL" of VSETL1. */
-static bool
-vsetvl_dominated_by_p (const basic_block cfg_bb,
- const vector_insn_info &vsetvl1,
- const vector_insn_info &vsetvl2, bool fuse_p)
-{
- if (!vsetvl1.valid_or_dirty_p () || !vsetvl2.valid_or_dirty_p ())
- return false;
- if (!has_vl_op (vsetvl1.get_insn ()->rtl ())
- || !vsetvl_insn_p (vsetvl2.get_insn ()->rtl ()))
- return false;
-
- hash_set<set_info *> sets
- = get_all_sets (vsetvl1.get_avl_source (), true, false, false);
- set_info *set = get_same_bb_set (sets, cfg_bb);
-
- if (!vsetvl1.has_avl_reg () || vlmax_avl_p (vsetvl1.get_avl ())
- || !vsetvl2.same_vlmax_p (vsetvl1) || !set
- || set->insn () != vsetvl2.get_insn ())
- return false;
-
- if (fuse_p && vsetvl2.same_vtype_p (vsetvl1))
- return false;
- else if (!fuse_p && !vsetvl2.same_vtype_p (vsetvl1))
- return false;
- return true;
-}
-
-avl_info::avl_info (const avl_info &other)
-{
- m_value = other.get_value ();
- m_source = other.get_source ();
-}
-
-avl_info::avl_info (rtx value_in, set_info *source_in)
- : m_value (value_in), m_source (source_in)
-{}
-
-bool
-avl_info::single_source_equal_p (const avl_info &other) const
-{
- set_info *set1 = m_source;
- set_info *set2 = other.get_source ();
- insn_info *insn1 = extract_single_source (set1);
- insn_info *insn2 = extract_single_source (set2);
- if (!insn1 || !insn2)
- return false;
- return source_equal_p (insn1, insn2);
-}
-
-bool
-avl_info::multiple_source_equal_p (const avl_info &other) const
-{
- /* When the def info is same in RTL_SSA namespace, it's safe
- to consider they are avl compatible. */
- if (m_source == other.get_source ())
- return true;
-
- /* We only consider handle PHI node. */
- if (!m_source->insn ()->is_phi () || !other.get_source ()->insn ()->is_phi ())
- return false;
-
- phi_info *phi1 = as_a<phi_info *> (m_source);
- phi_info *phi2 = as_a<phi_info *> (other.get_source ());
-
- if (phi1->is_degenerate () && phi2->is_degenerate ())
- {
- /* Degenerate PHI means the PHI node only have one input. */
-
- /* If both PHI nodes have the same single input in use list.
- We consider they are AVL compatible. */
- if (phi1->input_value (0) == phi2->input_value (0))
- return true;
- }
- /* TODO: We can support more optimization cases in the future. */
- return false;
-}
-
-avl_info &
-avl_info::operator= (const avl_info &other)
-{
- m_value = other.get_value ();
- m_source = other.get_source ();
- return *this;
-}
-
-bool
-avl_info::operator== (const avl_info &other) const
-{
- if (!m_value)
- return !other.get_value ();
- if (!other.get_value ())
- return false;
-
- if (GET_CODE (m_value) != GET_CODE (other.get_value ()))
- return false;
-
- /* Handle CONST_INT AVL. */
- if (CONST_INT_P (m_value))
- return INTVAL (m_value) == INTVAL (other.get_value ());
-
- /* Handle VLMAX AVL. */
- if (vlmax_avl_p (m_value))
- return vlmax_avl_p (other.get_value ());
- if (vlmax_avl_p (other.get_value ()))
- return false;
-
- /* If any source is undef value, we think they are not equal. */
- if (!m_source || !other.get_source ())
- return false;
-
- /* If both sources are single source (defined by a single real RTL)
- and their definitions are same. */
- if (single_source_equal_p (other))
- return true;
-
- return multiple_source_equal_p (other);
-}
-
-bool
-avl_info::operator!= (const avl_info &other) const
-{
- return !(*this == other);
-}
-
-bool
-avl_info::has_non_zero_avl () const
-{
- if (has_avl_imm ())
- return INTVAL (get_value ()) > 0;
- if (has_avl_reg ())
- return vlmax_avl_p (get_value ());
- return false;
-}
-
-/* Initialize VL/VTYPE information. */
-vl_vtype_info::vl_vtype_info (avl_info avl_in, uint8_t sew_in,
- enum vlmul_type vlmul_in, uint8_t ratio_in,
- bool ta_in, bool ma_in)
- : m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio (ratio_in),
- m_ta (ta_in), m_ma (ma_in)
-{
- gcc_assert (valid_sew_p (m_sew) && "Unexpected SEW");
-}
-
-bool
-vl_vtype_info::operator== (const vl_vtype_info &other) const
-{
- return same_avl_p (other) && m_sew == other.get_sew ()
- && m_vlmul == other.get_vlmul () && m_ta == other.get_ta ()
- && m_ma == other.get_ma () && m_ratio == other.get_ratio ();
-}
-
-bool
-vl_vtype_info::operator!= (const vl_vtype_info &other) const
-{
- return !(*this == other);
-}
-
-bool
-vl_vtype_info::same_avl_p (const vl_vtype_info &other) const
-{
- /* We need to compare both RTL and SET. If both AVL are CONST_INT.
- For example, const_int 3 and const_int 4, we need to compare
- RTL. If both AVL are REG and their REGNO are same, we need to
- compare SET. */
- return get_avl () == other.get_avl ()
- && get_avl_source () == other.get_avl_source ();
-}
-
-bool
-vl_vtype_info::same_vtype_p (const vl_vtype_info &other) const
-{
- return get_sew () == other.get_sew () && get_vlmul () == other.get_vlmul ()
- && get_ta () == other.get_ta () && get_ma () == other.get_ma ();
-}
-
-bool
-vl_vtype_info::same_vlmax_p (const vl_vtype_info &other) const
-{
- return get_ratio () == other.get_ratio ();
-}
-
-/* Compare the compatibility between Dem1 and Dem2.
- If Dem1 > Dem2, Dem1 has bigger compatibility then Dem2
- meaning Dem1 is easier be compatible with others than Dem2
- or Dem2 is stricter than Dem1.
- For example, Dem1 (demand SEW + LMUL) > Dem2 (demand RATIO). */
-bool
-vector_insn_info::operator>= (const vector_insn_info &other) const
-{
- if (support_relaxed_compatible_p (*this, other))
- {
- unsigned array_size = sizeof (unavailable_conds) / sizeof (demands_cond);
- /* Bypass AVL unavailable cases. */
- for (unsigned i = 2; i < array_size; i++)
- if (unavailable_conds[i].pair.match_cond_p (this->get_demands (),
- other.get_demands ())
- && unavailable_conds[i].incompatible_p (*this, other))
- return false;
- return true;
- }
-
- if (!other.compatible_p (static_cast<const vl_vtype_info &> (*this)))
- return false;
- if (!this->compatible_p (static_cast<const vl_vtype_info &> (other)))
- return true;
-
- if (*this == other)
- return true;
-
- for (const auto &cond : unavailable_conds)
- if (cond.pair.match_cond_p (this->get_demands (), other.get_demands ())
- && cond.incompatible_p (*this, other))
- return false;
-
- return true;
-}
-
-bool
-vector_insn_info::operator== (const vector_insn_info &other) const
-{
- gcc_assert (!uninit_p () && !other.uninit_p ()
- && "Uninitialization should not happen");
-
- /* Empty is only equal to another Empty. */
- if (empty_p ())
- return other.empty_p ();
- if (other.empty_p ())
- return empty_p ();
-
- /* Unknown is only equal to another Unknown. */
- if (unknown_p ())
- return other.unknown_p ();
- if (other.unknown_p ())
- return unknown_p ();
-
- for (size_t i = 0; i < NUM_DEMAND; i++)
- if (m_demands[i] != other.demand_p ((enum demand_type) i))
- return false;
-
- /* We should consider different INSN demands as different
- expression. Otherwise, we will be doing incorrect vsetvl
- elimination. */
- if (m_insn != other.get_insn ())
- return false;
-
- if (!same_avl_p (other))
- return false;
-
- /* If the full VTYPE is valid, check that it is the same. */
- return same_vtype_p (other);
-}
-
-void
-vector_insn_info::parse_insn (rtx_insn *rinsn)
-{
- *this = vector_insn_info ();
- if (!NONDEBUG_INSN_P (rinsn))
- return;
- if (optimize == 0 && !has_vtype_op (rinsn))
- return;
- gcc_assert (!vsetvl_discard_result_insn_p (rinsn));
- m_state = VALID;
- extract_insn_cached (rinsn);
- rtx avl = ::get_avl (rinsn);
- m_avl = avl_info (avl, nullptr);
- m_sew = ::get_sew (rinsn);
- m_vlmul = ::get_vlmul (rinsn);
- m_ta = tail_agnostic_p (rinsn);
- m_ma = mask_agnostic_p (rinsn);
-}
-
-void
-vector_insn_info::parse_insn (insn_info *insn)
-{
- *this = vector_insn_info ();
-
- /* Return if it is debug insn for the consistency with optimize == 0. */
- if (insn->is_debug_insn ())
- return;
-
- /* We set it as unknown since we don't what will happen in CALL or ASM. */
- if (insn->is_call () || insn->is_asm ())
- {
- set_unknown ();
+public:
+ vsetvl_info ()
+ : m_insn (nullptr), m_bb (nullptr), m_avl (NULL_RTX), m_vl (NULL_RTX),
+ m_avl_def (nullptr), m_sew (0), m_max_sew (0), m_vlmul (LMUL_RESERVED),
+ m_ratio (0), m_ta (false), m_ma (false),
+ m_sew_lmul_demand (sew_lmul_demand_type::sew_lmul),
+ m_policy_demand (policy_demand_type::tail_mask_policy),
+ m_avl_demand (avl_demand_type::avl), m_state (state_type::UNINITIALIZED),
+ m_ignore (false), change_vtype_only (false), m_read_vl_insn (nullptr),
+ use_by_non_rvv_insn (false)
+ {}
+
+ vsetvl_info (insn_info *insn) : vsetvl_info () { parse_insn (insn); }
+
+ vsetvl_info (rtx_insn *insn) : vsetvl_info () { parse_insn (insn); }
+
+ void set_avl (rtx avl) { m_avl = avl; }
+ void set_vl (rtx vl) { m_vl = vl; }
+ void set_avl_def (set_info *avl_def) { m_avl_def = avl_def; }
+ void set_sew (uint8_t sew) { m_sew = sew; }
+ void set_vlmul (vlmul_type vlmul) { m_vlmul = vlmul; }
+ void set_ratio (uint8_t ratio) { m_ratio = ratio; }
+ void set_ta (bool ta) { m_ta = ta; }
+ void set_ma (bool ma) { m_ma = ma; }
+ void set_ignore () { m_ignore = true; }
+ void set_bb (bb_info *bb) { m_bb = bb; }
+ void set_max_sew (uint8_t max_sew) { m_max_sew = max_sew; }
+ void set_change_vtype_only () { change_vtype_only = true; }
+ void set_read_vl_insn (insn_info *insn) { m_read_vl_insn = insn; }
+
+ rtx get_avl () const { return m_avl; }
+ rtx get_vl () const { return m_vl; }
+ set_info *get_avl_def () const { return m_avl_def; }
+ uint8_t get_sew () const { return m_sew; }
+ vlmul_type get_vlmul () const { return m_vlmul; }
+ uint8_t get_ratio () const { return m_ratio; }
+ bool get_ta () const { return m_ta; }
+ bool get_ma () const { return m_ma; }
+ insn_info *get_insn () const { return m_insn; }
+ bool ignore_p () const { return m_ignore; }
+ bb_info *get_bb () const { return m_bb; }
+ uint8_t get_max_sew () const { return m_max_sew; }
+ insn_info *get_read_vl_insn () const { return m_read_vl_insn; }
+ bool use_by_non_rvv_insn_p () const { return use_by_non_rvv_insn; }
+
+ bool has_imm_avl () const { return m_avl && CONST_INT_P (m_avl); }
+ bool has_vlmax_avl () const { return vlmax_avl_p (m_avl); }
+ bool has_reg_avl () const
+ {
+ return m_avl && REG_P (m_avl) && !has_vlmax_avl ();
+ }
+ bool has_non_zero_avl () const
+ {
+ if (has_imm_avl ())
+ return INTVAL (m_avl) > 0;
+ return has_vlmax_avl ();
+ }
+ bool has_reg_vl () const
+ {
+ gcc_assert (!m_vl || REG_P (m_vl));
+ return m_vl && REG_P (m_vl);
+ }
+ bool has_same_ratio (const vsetvl_info &other) const
+ {
+ return get_ratio () == other.get_ratio ();
+ }
+ bool is_in_origin_bb () const { return get_insn ()->bb () == get_bb (); }
+ void update_avl (const vsetvl_info &other)
+ {
+ m_avl = other.get_avl ();
+ m_vl = other.get_vl ();
+ m_avl_def = other.get_avl_def ();
+ }
+
+ bool uninit_p () const { return m_state == state_type::UNINITIALIZED; }
+ bool valid_p () const { return m_state == state_type::VALID; }
+ bool unknown_p () const { return m_state == state_type::UNKNOWN; }
+ bool empty_p () const { return m_state == state_type::EMPTY; }
+ bool change_vtype_only_p () const { return change_vtype_only; }
+
+ void set_valid () { m_state = state_type::VALID; }
+ void set_unknown () { m_state = state_type::UNKNOWN; }
+ void set_empty () { m_state = state_type::EMPTY; }
+
+ void set_sew_lmul_demand (sew_lmul_demand_type demand)
+ {
+ m_sew_lmul_demand = demand;
+ }
+ void set_policy_demand (policy_demand_type demand)
+ {
+ m_policy_demand = demand;
+ }
+ void set_avl_demand (avl_demand_type demand) { m_avl_demand = demand; }
+
+ sew_lmul_demand_type get_sew_lmul_demand () const
+ {
+ return m_sew_lmul_demand;
+ }
+ policy_demand_type get_policy_demand () const { return m_policy_demand; }
+ avl_demand_type get_avl_demand () const { return m_avl_demand; }
+
+ void normalize_demand (unsigned demand_flags)
+ {
+ switch (demand_flags
+ & (DEMAND_SEW_P | DEMAND_LMUL_P | DEMAND_RATIO_P | DEMAND_GE_SEW_P))
+ {
+ case (unsigned) sew_lmul_demand_type::sew_lmul:
+ m_sew_lmul_demand = sew_lmul_demand_type::sew_lmul;
+ break;
+ case (unsigned) sew_lmul_demand_type::ratio_only:
+ m_sew_lmul_demand = sew_lmul_demand_type::ratio_only;
+ break;
+ case (unsigned) sew_lmul_demand_type::sew_only:
+ m_sew_lmul_demand = sew_lmul_demand_type::sew_only;
+ break;
+ case (unsigned) sew_lmul_demand_type::ge_sew:
+ m_sew_lmul_demand = sew_lmul_demand_type::ge_sew;
+ break;
+ case (unsigned) sew_lmul_demand_type::ratio_and_ge_sew:
+ m_sew_lmul_demand = sew_lmul_demand_type::ratio_and_ge_sew;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ switch (demand_flags & (DEMAND_TAIL_POLICY_P | DEMAND_MASK_POLICY_P))
+ {
+ case (unsigned) policy_demand_type::tail_mask_policy:
+ m_policy_demand = policy_demand_type::tail_mask_policy;
+ break;
+ case (unsigned) policy_demand_type::tail_policy_only:
+ m_policy_demand = policy_demand_type::tail_policy_only;
+ break;
+ case (unsigned) policy_demand_type::mask_policy_only:
+ m_policy_demand = policy_demand_type::mask_policy_only;
+ break;
+ case (unsigned) policy_demand_type::ignore_policy:
+ m_policy_demand = policy_demand_type::ignore_policy;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ switch (demand_flags & (DEMAND_AVL_P | DEMAND_NON_ZERO_AVL_P))
+ {
+ case (unsigned) avl_demand_type::avl:
+ m_avl_demand = avl_demand_type::avl;
+ break;
+ case (unsigned) avl_demand_type::non_zero_avl:
+ m_avl_demand = avl_demand_type::non_zero_avl;
+ break;
+ case (unsigned) avl_demand_type::ignore_avl:
+ m_avl_demand = avl_demand_type::ignore_avl;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ void parse_insn (rtx_insn *rinsn)
+ {
+ if (!NONDEBUG_INSN_P (rinsn))
return;
- }
-
- /* If this is something that updates VL/VTYPE that we don't know about, set
- the state to unknown. */
- if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())
- && (find_access (insn->defs (), VL_REGNUM)
- || find_access (insn->defs (), VTYPE_REGNUM)))
- {
- set_unknown ();
+ if (optimize == 0 && !has_vtype_op (rinsn))
return;
- }
-
- if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()))
- return;
-
- /* Warning: This function has to work on both the lowered (i.e. post
- emit_local_forward_vsetvls) and pre-lowering forms. The main implication
- of this is that it can't use the value of a SEW, VL, or Policy operand as
- they might be stale after lowering. */
- vl_vtype_info::operator= (get_vl_vtype_info (insn));
- m_insn = insn;
- m_state = VALID;
- if (vector_config_insn_p (insn->rtl ()))
- {
- m_demands[DEMAND_AVL] = true;
- m_demands[DEMAND_RATIO] = true;
+ gcc_assert (!vsetvl_discard_result_insn_p (rinsn));
+ set_valid ();
+ extract_insn_cached (rinsn);
+ m_avl = ::get_avl (rinsn);
+ if (has_vlmax_avl () || vsetvl_insn_p (rinsn))
+ m_vl = ::get_vl (rinsn);
+ m_sew = ::get_sew (rinsn);
+ m_vlmul = ::get_vlmul (rinsn);
+ m_ta = tail_agnostic_p (rinsn);
+ m_ma = mask_agnostic_p (rinsn);
+ }
+
+ void parse_insn (insn_info *insn)
+ {
+ m_insn = insn;
+ m_bb = insn->bb ();
+ /* Return if it is debug insn for the consistency with optimize == 0. */
+ if (insn->is_debug_insn ())
return;
- }
-
- if (has_vl_op (insn->rtl ()))
- m_demands[DEMAND_AVL] = true;
-
- if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE)
- m_demands[DEMAND_RATIO] = true;
- else
- {
- /* TODO: By default, if it doesn't demand RATIO, we set it
- demand SEW && LMUL both. Some instructions may demand SEW
- only and ignore LMUL, will fix it later. */
- m_demands[DEMAND_SEW] = true;
- if (!ignore_vlmul_insn_p (insn->rtl ()))
- m_demands[DEMAND_LMUL] = true;
- }
-
- if (get_attr_ta (insn->rtl ()) != INVALID_ATTRIBUTE)
- m_demands[DEMAND_TAIL_POLICY] = true;
- if (get_attr_ma (insn->rtl ()) != INVALID_ATTRIBUTE)
- m_demands[DEMAND_MASK_POLICY] = true;
-
- if (vector_config_insn_p (insn->rtl ()))
- return;
-
- if (scalar_move_insn_p (insn->rtl ()))
- {
- if (m_avl.has_non_zero_avl ())
- m_demands[DEMAND_NONZERO_AVL] = true;
- if (m_ta)
- m_demands[DEMAND_GE_SEW] = true;
- }
- if (!m_avl.has_avl_reg () || vlmax_avl_p (get_avl ()) || !m_avl.get_source ())
- return;
- if (!m_avl.get_source ()->insn ()->is_real ()
- && !m_avl.get_source ()->insn ()->is_phi ())
- return;
+ /* We set it as unknown since we don't what will happen in CALL or ASM. */
+ if (insn->is_call () || insn->is_asm ())
+ {
+ set_unknown ();
+ return;
+ }
+
+ /* If this is something that updates VL/VTYPE that we don't know about, set
+ the state to unknown. */
+ if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())
+ && (find_access (insn->defs (), VL_REGNUM)
+ || find_access (insn->defs (), VTYPE_REGNUM)))
+ {
+ set_unknown ();
+ return;
+ }
+
+ if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()))
+ /* uninitialized */
+ return;
- insn_info *def_insn = extract_single_source (m_avl.get_source ());
- if (!def_insn || !vsetvl_insn_p (def_insn->rtl ()))
- return;
+ set_valid ();
+
+ m_avl = ::get_avl (insn->rtl ());
+ if (m_avl)
+ {
+ if (vsetvl_insn_p (insn->rtl ()) || has_vlmax_avl ())
+ m_vl = ::get_vl (insn->rtl ());
+
+ if (has_reg_avl ())
+ m_avl_def = find_access (insn->uses (), REGNO (m_avl))->def ();
+ }
+
+ m_sew = ::get_sew (insn->rtl ());
+ m_vlmul = ::get_vlmul (insn->rtl ());
+ m_ratio = get_attr_ratio (insn->rtl ());
+ /* when get_attr_ratio is invalid, this kind of instructions
+ doesn't care about ratio. However, we still need this value
+ in demand info backward analysis. */
+ if (m_ratio == INVALID_ATTRIBUTE)
+ m_ratio = calculate_ratio (m_sew, m_vlmul);
+ m_ta = tail_agnostic_p (insn->rtl ());
+ m_ma = mask_agnostic_p (insn->rtl ());
+
+ /* If merge operand is undef value, we prefer agnostic. */
+ int merge_op_idx = get_attr_merge_op_idx (insn->rtl ());
+ if (merge_op_idx != INVALID_ATTRIBUTE
+ && satisfies_constraint_vu (recog_data.operand[merge_op_idx]))
+ {
+ m_ta = true;
+ m_ma = true;
+ }
+
+ /* Determine the demand info of the RVV insn. */
+ m_max_sew = get_max_int_sew ();
+ unsigned demand_flags = 0;
+ if (vector_config_insn_p (insn->rtl ()))
+ {
+ demand_flags |= demand_flags::DEMAND_AVL_P;
+ demand_flags |= demand_flags::DEMAND_RATIO_P;
+ }
+ else
+ {
+ if (has_vl_op (insn->rtl ()))
+ {
+ if (scalar_move_insn_p (insn->rtl ()))
+ {
+ /* If the avl for vmv.s.x comes from the vsetvl instruction, we
+ don't know if the avl is non-zero, so it is set to
+ DEMAND_AVL_P for now. it may be corrected to
+ DEMAND_NON_ZERO_AVL_P later when more information is
+ available.
+ */
+ if (has_non_zero_avl ())
+ demand_flags |= demand_flags::DEMAND_NON_ZERO_AVL_P;
+ else
+ demand_flags |= demand_flags::DEMAND_AVL_P;
+ }
+ else
+ demand_flags |= demand_flags::DEMAND_AVL_P;
+ }
- vector_insn_info new_info;
- new_info.parse_insn (def_insn);
- if (!same_vlmax_p (new_info) && !scalar_move_insn_p (insn->rtl ()))
- return;
+ if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE)
+ demand_flags |= demand_flags::DEMAND_RATIO_P;
+ else
+ {
+ if (scalar_move_insn_p (insn->rtl ()) && m_ta)
+ {
+ demand_flags |= demand_flags::DEMAND_GE_SEW_P;
+ m_max_sew = get_attr_type (insn->rtl ()) == TYPE_VFMOVFV
+ ? get_max_float_sew ()
+ : get_max_int_sew ();
+ }
+ else
+ demand_flags |= demand_flags::DEMAND_SEW_P;
+
+ if (!ignore_vlmul_insn_p (insn->rtl ()))
+ demand_flags |= demand_flags::DEMAND_LMUL_P;
+ }
- if (new_info.has_avl ())
- {
- if (new_info.has_avl_imm ())
- set_avl_info (avl_info (new_info.get_avl (), nullptr));
- else
- {
- if (vlmax_avl_p (new_info.get_avl ()))
- set_avl_info (avl_info (new_info.get_avl (), get_avl_source ()));
- else
- {
- /* Conservatively propagate non-VLMAX AVL of user vsetvl:
- 1. The user vsetvl should be same block with the rvv insn.
- 2. The user vsetvl is the only def insn of rvv insn.
- 3. The AVL is not modified between def-use chain.
- 4. The VL is only used by insn within EBB.
- */
- bool modified_p = false;
- for (insn_info *i = def_insn->next_nondebug_insn ();
- real_insn_and_same_bb_p (i, get_insn ()->bb ());
- i = i->next_nondebug_insn ())
- {
- /* Consider this following sequence:
-
- insn 1: vsetvli a5,a3,e8,mf4,ta,mu
- insn 2: vsetvli zero,a5,e32,m1,ta,ma
- ...
- vle32.v v1,0(a1)
- vsetvli a2,zero,e32,m1,ta,ma
- vadd.vv v1,v1,v1
- vsetvli zero,a5,e32,m1,ta,ma
- vse32.v v1,0(a0)
- ...
- insn 3: sub a3,a3,a5
- ...
-
- We can local AVL propagate "a3" from insn 1 to insn 2
- if no insns between insn 1 and insn 2 modify "a3 even
- though insn 3 modifies "a3".
- Otherwise, we can't perform local AVL propagation.
-
- Early break if we reach the insn 2. */
- if (!before_p (i, insn))
- break;
- if (find_access (i->defs (), REGNO (new_info.get_avl ())))
- {
- modified_p = true;
- break;
- }
- }
+ if (!m_ta)
+ demand_flags |= demand_flags::DEMAND_TAIL_POLICY_P;
+ if (!m_ma)
+ demand_flags |= demand_flags::DEMAND_MASK_POLICY_P;
+ }
+
+ normalize_demand (demand_flags);
+
+ /* Optimize AVL from the vsetvl instruction. */
+ insn_info *def_insn = extract_single_source (get_avl_def ());
+ if (def_insn && vsetvl_insn_p (def_insn->rtl ()))
+ {
+ vsetvl_info def_info = vsetvl_info (def_insn);
+ if ((scalar_move_insn_p (insn->rtl ())
+ || def_info.get_ratio () == get_ratio ())
+ && (def_info.has_vlmax_avl () || def_info.has_imm_avl ()))
+ {
+ update_avl (def_info);
+ if (scalar_move_insn_p (insn->rtl ()) && has_non_zero_avl ())
+ m_avl_demand = avl_demand_type::non_zero_avl;
+ }
+ }
+
+ /* Determine if dest operand(vl) has been used by non-RVV instructions. */
+ if (has_reg_vl ())
+ {
+ const hash_set<use_info *> vl_uses
+ = get_all_real_uses (get_insn (), REGNO (get_vl ()));
+ for (use_info *use : vl_uses)
+ {
+ gcc_assert (use->insn ()->is_real ());
+ rtx_insn *rinsn = use->insn ()->rtl ();
+ if (!has_vl_op (rinsn)
+ || count_regno_occurrences (rinsn, REGNO (get_vl ())) != 1)
+ {
+ use_by_non_rvv_insn = true;
+ break;
+ }
+ rtx avl = ::get_avl (rinsn);
+ if (!avl || REGNO (get_vl ()) != REGNO (avl))
+ {
+ use_by_non_rvv_insn = true;
+ break;
+ }
+ }
+ }
- bool has_live_out_use = false;
- for (use_info *use : m_avl.get_source ()->all_uses ())
- {
- if (use->is_live_out_use ())
- {
- has_live_out_use = true;
- break;
- }
- }
- if (!modified_p && !has_live_out_use
- && def_insn == m_avl.get_source ()->insn ()
- && m_insn->bb () == def_insn->bb ())
- set_avl_info (new_info.get_avl_info ());
- }
- }
- }
+ /* Collect the read vl insn for the fault-only-first rvv loads. */
+ if (fault_first_load_p (insn->rtl ()))
+ {
+ for (insn_info *i = insn->next_nondebug_insn ();
+ i->bb () == insn->bb (); i = i->next_nondebug_insn ())
+ {
+ if (find_access (i->defs (), VL_REGNUM))
+ break;
+ if (i->rtl () && read_vl_insn_p (i->rtl ()))
+ {
+ m_read_vl_insn = i;
+ break;
+ }
+ }
+ }
+ }
+
+ bool operator== (const vsetvl_info &other) const
+ {
+ gcc_assert (!uninit_p () && !other.uninit_p ()
+ && "Uninitialization should not happen");
+
+ if (empty_p ())
+ return other.empty_p ();
+ if (unknown_p ())
+ return other.unknown_p ();
+
+ return get_insn () == other.get_insn () && get_bb () == other.get_bb ()
+ && get_avl () == other.get_avl () && get_vl () == other.get_vl ()
+ && get_avl_def () == other.get_avl_def ()
+ && get_sew () == other.get_sew ()
+ && get_vlmul () == other.get_vlmul () && get_ta () == other.get_ta ()
+ && get_ma () == other.get_ma ()
+ && get_avl_demand () == other.get_avl_demand ()
+ && get_sew_lmul_demand () == other.get_sew_lmul_demand ()
+ && get_policy_demand () == other.get_policy_demand ();
+ }
+
+ void dump (FILE *file, const char *indent = "") const
+ {
+ if (uninit_p ())
+ {
+ fprintf (file, "UNINITIALIZED.\n");
+ return;
+ }
+ else if (unknown_p ())
+ {
+ fprintf (file, "UNKNOWN.\n");
+ return;
+ }
+ else if (empty_p ())
+ {
+ fprintf (file, "EMPTY.\n");
+ return;
+ }
+ else if (valid_p ())
+ fprintf (file, "VALID (insn %u, bb %u)%s\n", get_insn ()->uid (),
+ get_bb ()->index (), ignore_p () ? " (ignore)" : "");
+ else
+ gcc_unreachable ();
- if (scalar_move_insn_p (insn->rtl ()) && m_avl.has_non_zero_avl ())
- m_demands[DEMAND_NONZERO_AVL] = true;
-}
+ fprintf (file, "%sDemand fields:", indent);
+ if (m_sew_lmul_demand == sew_lmul_demand_type::sew_lmul)
+ fprintf (file, " demand_sew_lmul");
+ else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_only)
+ fprintf (file, " demand_ratio_only");
+ else if (m_sew_lmul_demand == sew_lmul_demand_type::sew_only)
+ fprintf (file, " demand_sew_only");
+ else if (m_sew_lmul_demand == sew_lmul_demand_type::ge_sew)
+ fprintf (file, " demand_ge_sew");
+ else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_and_ge_sew)
+ fprintf (file, " demand_ratio_and_ge_sew");
+
+ if (m_policy_demand == policy_demand_type::tail_mask_policy)
+ fprintf (file, " demand_tail_mask_policy");
+ else if (m_policy_demand == policy_demand_type::tail_policy_only)
+ fprintf (file, " demand_tail_policy_only");
+ else if (m_policy_demand == policy_demand_type::mask_policy_only)
+ fprintf (file, " demand_mask_policy_only");
+
+ if (m_avl_demand == avl_demand_type::avl)
+ fprintf (file, " demand_avl");
+ else if (m_avl_demand == avl_demand_type::non_zero_avl)
+ fprintf (file, " demand_non_zero_avl");
+ fprintf (file, "\n");
+
+ fprintf (file, "%sSEW=%d, ", indent, get_sew ());
+ fprintf (file, "VLMUL=%s, ", vlmul_to_str (get_vlmul ()));
+ fprintf (file, "RATIO=%d, ", get_ratio ());
+ fprintf (file, "MAX_SEW=%d\n", get_max_sew ());
+
+ fprintf (file, "%sTAIL_POLICY=%s, ", indent, policy_to_str (get_ta ()));
+ fprintf (file, "MASK_POLICY=%s\n", policy_to_str (get_ma ()));
+
+ fprintf (file, "%sAVL=", indent);
+ print_rtl_single (file, get_avl ());
+ fprintf (file, "%sVL=", indent);
+ print_rtl_single (file, get_vl ());
+ if (change_vtype_only_p ())
+ fprintf (file, "%schange vtype only\n", indent);
+ if (get_read_vl_insn ())
+ fprintf (file, "%sread_vl_insn: insn %u\n", indent,
+ get_read_vl_insn ()->uid ());
+ if (use_by_non_rvv_insn_p ())
+ fprintf (file, "%suse_by_non_rvv_insn=true\n", indent);
+ }
+};
bool
-vector_insn_info::compatible_p (const vector_insn_info &other) const
+same_equiv_note_p (const vsetvl_info &prev, const vsetvl_info &next)
{
- gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p ()
- && "Can't compare invalid demanded infos");
-
- for (const auto &cond : incompatible_conds)
- if (cond.dual_incompatible_p (*this, other))
- return false;
- return true;
+ set_info *set1 = prev.get_avl_def ();
+ set_info *set2 = next.get_avl_def ();
+ insn_info *insn1 = extract_single_source (set1);
+ insn_info *insn2 = extract_single_source (set2);
+ if (!insn1 || !insn2)
+ return false;
+ return source_equal_p (insn1, insn2);
}
-bool
-vector_insn_info::skip_avl_compatible_p (const vector_insn_info &other) const
+class demand_system
{
- gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p ()
- && "Can't compare invalid demanded infos");
- unsigned array_size = sizeof (incompatible_conds) / sizeof (demands_cond);
- /* Bypass AVL incompatible cases. */
- for (unsigned i = 1; i < array_size; i++)
- if (incompatible_conds[i].dual_incompatible_p (*this, other))
- return false;
- return true;
-}
+private:
+ sbitmap *m_avl_def_in;
+ sbitmap *m_avl_def_out;
-bool
-vector_insn_info::compatible_avl_p (const vl_vtype_info &other) const
-{
- gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
- gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
- if (!demand_p (DEMAND_AVL))
- return true;
- if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ())
- return true;
- return get_avl_info () == other.get_avl_info ();
-}
+ /* predictors. */
-bool
-vector_insn_info::compatible_avl_p (const avl_info &other) const
-{
- gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
- gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
- gcc_assert (demand_p (DEMAND_AVL) && "Can't compare AVL undemand state");
- if (!demand_p (DEMAND_AVL))
- return true;
- if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ())
+ inline bool always_true (const vsetvl_info &prev ATTRIBUTE_UNUSED,
+ const vsetvl_info &next ATTRIBUTE_UNUSED)
+ {
return true;
- return get_avl_info () == other;
-}
-
-bool
-vector_insn_info::compatible_vtype_p (const vl_vtype_info &other) const
-{
- gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
- gcc_assert (!unknown_p () && "Can't compare VTYPE in unknown state");
- if (demand_p (DEMAND_SEW))
- {
- if (!demand_p (DEMAND_GE_SEW) && m_sew != other.get_sew ())
- return false;
- if (demand_p (DEMAND_GE_SEW) && m_sew > other.get_sew ())
- return false;
- }
- if (demand_p (DEMAND_LMUL) && m_vlmul != other.get_vlmul ())
- return false;
- if (demand_p (DEMAND_RATIO) && m_ratio != other.get_ratio ())
- return false;
- if (demand_p (DEMAND_TAIL_POLICY) && m_ta != other.get_ta ())
+ }
+ inline bool always_false (const vsetvl_info &prev ATTRIBUTE_UNUSED,
+ const vsetvl_info &next ATTRIBUTE_UNUSED)
+ {
return false;
- if (demand_p (DEMAND_MASK_POLICY) && m_ma != other.get_ma ())
- return false;
- return true;
-}
-
-/* Determine whether the vector instructions requirements represented by
- Require are compatible with the previous vsetvli instruction represented
- by this. INSN is the instruction whose requirements we're considering. */
-bool
-vector_insn_info::compatible_p (const vl_vtype_info &curr_info) const
-{
- gcc_assert (!uninit_p () && "Can't handle uninitialized info");
- if (empty_p ())
+ }
+
+ /* predictors for sew and lmul */
+
+ inline bool eq_lmul_p (const vsetvl_info &prev, const vsetvl_info &next)
+ {
+ return prev.get_vlmul () == next.get_vlmul ();
+ }
+ inline bool eq_sew_p (const vsetvl_info &prev, const vsetvl_info &next)
+ {
+ return prev.get_sew () == next.get_sew ();
+ }
+ inline bool eq_sew_lmul_p (const vsetvl_info &prev, const vsetvl_info &next)
+ {
+ return eq_lmul_p (prev, next) && eq_sew_p (prev, next);
+ }
+ inline bool ge_next_sew_p (const vsetvl_info &prev, const vsetvl_info &next)
+ {
+ return prev.get_sew () == next.get_sew ()
+ || (next.get_ta () && prev.get_sew () > next.get_sew ());
+ }
+ inline bool ge_prev_sew_p (const vsetvl_info &prev, const vsetvl_info &next)
+ {
+ return prev.get_sew () == next.get_sew ()
+ || (prev.get_ta () && prev.get_sew () < next.get_sew ());
+ }
+ inline bool le_next_max_sew_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return prev.get_sew () <= next.get_max_sew ();
+ }
+ inline bool le_prev_max_sew_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return next.get_sew () <= prev.get_max_sew ();
+ }
+ inline bool max_sew_overlap_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return !(prev.get_sew () > next.get_max_sew ()
+ || next.get_sew () > prev.get_max_sew ());
+ }
+ inline bool eq_ratio_p (const vsetvl_info &prev, const vsetvl_info &next)
+ {
+ return prev.has_same_ratio (next);
+ }
+ inline bool has_prev_ratio_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return prev.get_ratio () >= (next.get_sew () / 8);
+ }
+ inline bool has_next_ratio_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return next.get_ratio () >= (prev.get_sew () / 8);
+ }
+
+ inline bool ge_next_sew_and_eq_ratio_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return ge_next_sew_p (prev, next) && eq_ratio_p (prev, next);
+ }
+ inline bool ge_next_sew_and_le_next_max_sew_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return ge_next_sew_p (prev, next) && le_next_max_sew_p (prev, next);
+ }
+ inline bool
+ ge_next_sew_and_le_next_max_sew_and_has_next_ratio_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return ge_next_sew_p (prev, next) && le_next_max_sew_p (prev, next)
+ && has_next_ratio_p (prev, next);
+ }
+ inline bool ge_prev_sew_and_le_prev_max_sew_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return ge_prev_sew_p (prev, next) && le_prev_max_sew_p (prev, next);
+ }
+ inline bool max_sew_overlap_and_has_next_ratio_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return has_next_ratio_p (prev, next) && max_sew_overlap_p (prev, next);
+ }
+ inline bool
+ ge_prev_sew_and_le_prev_max_sew_and_eq_ratio_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return ge_prev_sew_p (prev, next) && eq_ratio_p (prev, next)
+ && le_prev_max_sew_p (prev, next);
+ }
+ inline bool max_sew_overlap_and_has_prev_ratio_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return has_prev_ratio_p (prev, next) && max_sew_overlap_p (prev, next);
+ }
+ inline bool
+ ge_prev_sew_and_le_prev_max_sew_and_has_prev_ratio_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return ge_prev_sew_p (prev, next) && has_prev_ratio_p (prev, next)
+ && le_prev_max_sew_p (prev, next);
+ }
+ inline bool max_sew_overlap_and_eq_ratio_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return eq_ratio_p (prev, next) && max_sew_overlap_p (prev, next);
+ }
+
+ /* predictors for tail and mask policy */
+
+ inline bool eq_tail_policy_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return prev.get_ta () == next.get_ta ();
+ }
+ inline bool eq_mask_policy_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return prev.get_ma () == next.get_ma ();
+ }
+ inline bool eq_tail_mask_policy_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return eq_tail_policy_p (prev, next) && eq_mask_policy_p (prev, next);
+ }
+
+ inline bool comp_tail_policy_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return prev.get_ta () || next.get_ta () || eq_tail_policy_p (prev, next);
+ }
+
+ inline bool comp_mask_policy_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return prev.get_ma () || next.get_ma () || eq_mask_policy_p (prev, next);
+ }
+
+ inline bool comp_tail_mask_policy_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return comp_tail_policy_p (prev, next) && comp_mask_policy_p (prev, next);
+ }
+
+ /* predictors for avl */
+
+ inline bool def_or_use_vl_p (insn_info *i, const vsetvl_info &info)
+ {
+ return info.has_reg_vl ()
+ && (find_access (i->uses (), REGNO (info.get_vl ()))
+ || find_access (i->defs (), REGNO (info.get_vl ())));
+ }
+ inline bool def_avl_p (insn_info *i, const vsetvl_info &info)
+ {
+ return info.has_reg_avl ()
+ && find_access (i->defs (), REGNO (info.get_avl ()));
+ }
+
+ inline bool def_reg_between (insn_info *prev_insn, insn_info *curr_insn,
+ unsigned regno)
+ {
+ gcc_assert (prev_insn->compare_with (curr_insn) < 0);
+ /* 当个BB里面从上往下,不跨边 */
+ for (insn_info *i = curr_insn->prev_nondebug_insn (); i != prev_insn;
+ i = i->prev_nondebug_insn ())
+ {
+ // no def of regno
+ if (find_access (i->defs (), regno))
+ return true;
+ }
return false;
+ }
- /* Nothing is compatible with Unknown. */
- if (unknown_p ())
- return false;
+ inline bool same_reg_avl_p (const vsetvl_info &prev, const vsetvl_info &next)
+ {
+ if (!prev.has_reg_avl () || !next.has_reg_avl ())
+ return false;
- /* If the instruction doesn't need an AVLReg and the SEW matches, consider
- it compatible. */
- if (!demand_p (DEMAND_AVL))
- if (m_sew == curr_info.get_sew ())
+ if (same_equiv_note_p (prev, next))
return true;
- return compatible_avl_p (curr_info) && compatible_vtype_p (curr_info);
-}
-
-bool
-vector_insn_info::available_p (const vector_insn_info &other) const
-{
- return *this >= other;
-}
-
-void
-vector_insn_info::fuse_avl (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- set_insn (info1.get_insn ());
- if (info1.demand_p (DEMAND_AVL))
- {
- if (info1.demand_p (DEMAND_NONZERO_AVL))
- {
- if (info2.demand_p (DEMAND_AVL)
- && !info2.demand_p (DEMAND_NONZERO_AVL))
- {
- set_avl_info (info2.get_avl_info ());
- set_demand (DEMAND_AVL, true);
- set_demand (DEMAND_NONZERO_AVL, false);
- return;
- }
- }
- set_avl_info (info1.get_avl_info ());
- set_demand (DEMAND_NONZERO_AVL, info1.demand_p (DEMAND_NONZERO_AVL));
- }
- else
- {
- set_avl_info (info2.get_avl_info ());
- set_demand (DEMAND_NONZERO_AVL, info2.demand_p (DEMAND_NONZERO_AVL));
- }
- set_demand (DEMAND_AVL,
- info1.demand_p (DEMAND_AVL) || info2.demand_p (DEMAND_AVL));
-}
-
-void
-vector_insn_info::fuse_sew_lmul (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- /* We need to fuse sew && lmul according to demand info:
-
- 1. GE_SEW.
- 2. SEW.
- 3. LMUL.
- 4. RATIO. */
- if (same_sew_lmul_demand_p (info1.get_demands (), info2.get_demands ()))
- {
- set_demand (DEMAND_SEW, info2.demand_p (DEMAND_SEW));
- set_demand (DEMAND_LMUL, info2.demand_p (DEMAND_LMUL));
- set_demand (DEMAND_RATIO, info2.demand_p (DEMAND_RATIO));
- set_demand (DEMAND_GE_SEW, info2.demand_p (DEMAND_GE_SEW));
- set_sew (info2.get_sew ());
- set_vlmul (info2.get_vlmul ());
- set_ratio (info2.get_ratio ());
- return;
- }
- for (const auto &rule : fuse_rules)
- {
- if (rule.pair.match_cond_p (info1.get_demands (), info2.get_demands ()))
- {
- set_demand (DEMAND_SEW, rule.demand_sew_p);
- set_demand (DEMAND_LMUL, rule.demand_lmul_p);
- set_demand (DEMAND_RATIO, rule.demand_ratio_p);
- set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p);
- set_sew (rule.new_sew (info1, info2));
- set_vlmul (rule.new_vlmul (info1, info2));
- set_ratio (rule.new_ratio (info1, info2));
- return;
- }
- if (rule.pair.match_cond_p (info2.get_demands (), info1.get_demands ()))
- {
- set_demand (DEMAND_SEW, rule.demand_sew_p);
- set_demand (DEMAND_LMUL, rule.demand_lmul_p);
- set_demand (DEMAND_RATIO, rule.demand_ratio_p);
- set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p);
- set_sew (rule.new_sew (info2, info1));
- set_vlmul (rule.new_vlmul (info2, info1));
- set_ratio (rule.new_ratio (info2, info1));
- return;
- }
- }
- gcc_unreachable ();
-}
-
-void
-vector_insn_info::fuse_tail_policy (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- if (info1.demand_p (DEMAND_TAIL_POLICY))
- {
- set_ta (info1.get_ta ());
- demand (DEMAND_TAIL_POLICY);
- }
- else if (info2.demand_p (DEMAND_TAIL_POLICY))
- {
- set_ta (info2.get_ta ());
- demand (DEMAND_TAIL_POLICY);
- }
- else
- set_ta (get_default_ta ());
-}
-
-void
-vector_insn_info::fuse_mask_policy (const vector_insn_info &info1,
- const vector_insn_info &info2)
-{
- if (info1.demand_p (DEMAND_MASK_POLICY))
- {
- set_ma (info1.get_ma ());
- demand (DEMAND_MASK_POLICY);
- }
- else if (info2.demand_p (DEMAND_MASK_POLICY))
- {
- set_ma (info2.get_ma ());
- demand (DEMAND_MASK_POLICY);
- }
- else
- set_ma (get_default_ma ());
-}
-
-vector_insn_info
-vector_insn_info::local_merge (const vector_insn_info &merge_info) const
-{
- if (!vsetvl_insn_p (get_insn ()->rtl ()) && *this != merge_info)
- gcc_assert (this->compatible_p (merge_info)
- && "Can't merge incompatible demanded infos");
-
- vector_insn_info new_info;
- new_info.set_valid ();
- /* For local backward data flow, we always update INSN && AVL as the
- latest INSN and AVL so that we can keep track status of each INSN. */
- new_info.fuse_avl (merge_info, *this);
- new_info.fuse_sew_lmul (*this, merge_info);
- new_info.fuse_tail_policy (*this, merge_info);
- new_info.fuse_mask_policy (*this, merge_info);
- return new_info;
-}
+ if (REGNO (prev.get_avl ()) != REGNO (next.get_avl ()))
+ return false;
-vector_insn_info
-vector_insn_info::global_merge (const vector_insn_info &merge_info,
- unsigned int bb_index) const
-{
- if (!vsetvl_insn_p (get_insn ()->rtl ()) && *this != merge_info)
- gcc_assert (this->compatible_p (merge_info)
- && "Can't merge incompatible demanded infos");
-
- vector_insn_info new_info;
- new_info.set_valid ();
-
- /* For global data flow, we should keep original INSN and AVL if they
- valid since we should keep the life information of each block.
-
- For example:
- bb 0 -> bb 1.
- We should keep INSN && AVL of bb 1 since we will eventually emit
- vsetvl instruction according to INSN and AVL of bb 1. */
- new_info.fuse_avl (*this, merge_info);
- /* Recompute the AVL source whose block index is equal to BB_INDEX. */
- if (new_info.get_avl_source ()
- && new_info.get_avl_source ()->insn ()->is_phi ()
- && new_info.get_avl_source ()->bb ()->index () != bb_index)
- {
- hash_set<set_info *> sets
- = get_all_sets (new_info.get_avl_source (), true, true, true);
- new_info.set_avl_source (nullptr);
- bool can_find_set_p = false;
- set_info *first_set = nullptr;
- for (set_info *set : sets)
- {
- if (!first_set)
- first_set = set;
- if (set->bb ()->index () == bb_index)
- {
- gcc_assert (!can_find_set_p);
- new_info.set_avl_source (set);
- can_find_set_p = true;
- }
- }
- if (!can_find_set_p && sets.elements () == 1
- && first_set->insn ()->is_real ())
- new_info.set_avl_source (first_set);
- }
+ insn_info *prev_insn = prev.get_insn ();
+ if (prev.get_bb () != prev_insn->bb ())
+ prev_insn = prev.get_bb ()->end_insn ();
- /* Make sure VLMAX AVL always has a set_info the get VL. */
- if (vlmax_avl_p (new_info.get_avl ()))
- {
- if (this->get_avl_source ())
- new_info.set_avl_source (this->get_avl_source ());
- else
- {
- gcc_assert (merge_info.get_avl_source ());
- new_info.set_avl_source (merge_info.get_avl_source ());
- }
- }
+ insn_info *next_insn = next.get_insn ();
+ if (next.get_bb () != next_insn->bb ())
+ next_insn = next.get_bb ()->end_insn ();
- new_info.fuse_sew_lmul (*this, merge_info);
- new_info.fuse_tail_policy (*this, merge_info);
- new_info.fuse_mask_policy (*this, merge_info);
- return new_info;
-}
+ /* 实际上next的vl可以被修改,只要vl没有被使用 */
+ return safe_move_avl_vl_p (prev_insn, next_insn, next, false);
+ }
-/* Wrapper helps to return the AVL or VL operand for the
- vector_insn_info. Return AVL if the AVL is not VLMAX.
- Otherwise, return the VL operand. */
-rtx
-vector_insn_info::get_avl_or_vl_reg (void) const
-{
- gcc_assert (has_avl_reg ());
- if (!vlmax_avl_p (get_avl ()))
- return get_avl ();
+ inline bool equal_avl_p (const vsetvl_info &prev, const vsetvl_info &next)
+ {
+ gcc_assert (prev.valid_p () && next.valid_p ());
- rtx_insn *rinsn = get_insn ()->rtl ();
- if (has_vl_op (rinsn) || vsetvl_insn_p (rinsn))
- {
- rtx vl = ::get_vl (rinsn);
- /* For VLMAX, we should make sure we get the
- REG to emit 'vsetvl VL,zero' since the 'VL'
- should be the REG according to RVV ISA. */
- if (REG_P (vl))
- return vl;
- }
+ if (prev.get_ratio () != next.get_ratio ())
+ return false;
- /* We always has avl_source if it is VLMAX AVL. */
- gcc_assert (get_avl_source ());
- return get_avl_reg_rtx ();
-}
+ if (next.has_reg_vl () && next.use_by_non_rvv_insn_p ())
+ return false;
-bool
-vector_insn_info::update_fault_first_load_avl (insn_info *insn)
-{
- // Update AVL to vl-output of the fault first load.
- const insn_info *read_vl = get_forward_read_vl_insn (insn);
- if (read_vl)
- {
- rtx vl = SET_DEST (PATTERN (read_vl->rtl ()));
- def_info *def = find_access (read_vl->defs (), REGNO (vl));
- set_info *set = safe_dyn_cast<set_info *> (def);
- set_avl_info (avl_info (vl, set));
- set_insn (insn);
+ if (vector_config_insn_p (prev.get_insn ()->rtl ()) && next.get_avl_def ()
+ && next.get_avl_def ()->insn () == prev.get_insn ())
return true;
- }
- return false;
-}
-
-static const char *
-vlmul_to_str (vlmul_type vlmul)
-{
- switch (vlmul)
- {
- case LMUL_1:
- return "m1";
- case LMUL_2:
- return "m2";
- case LMUL_4:
- return "m4";
- case LMUL_8:
- return "m8";
- case LMUL_RESERVED:
- return "INVALID LMUL";
- case LMUL_F8:
- return "mf8";
- case LMUL_F4:
- return "mf4";
- case LMUL_F2:
- return "mf2";
-
- default:
- gcc_unreachable ();
- }
-}
-static const char *
-policy_to_str (bool agnostic_p)
-{
- return agnostic_p ? "agnostic" : "undisturbed";
-}
+ if (prev.get_read_vl_insn ())
+ {
+ if (!next.has_reg_avl () || !next.get_avl_def ())
+ return false;
+ insn_info *avl_def_insn = extract_single_source (next.get_avl_def ());
+ return avl_def_insn == prev.get_read_vl_insn ();
+ }
+
+ if (prev == next && prev.has_reg_avl ())
+ {
+ /* 单个BB作为一个Loop的情况 */
+ insn_info *insn = prev.get_insn ();
+ bb_info *bb = insn->bb ();
+ for (insn_info *i = insn; real_insn_and_same_bb_p (i, bb);
+ i = i->next_nondebug_insn ())
+ if (find_access (i->defs (), REGNO (prev.get_avl ())))
+ return false;
+ }
-void
-vector_insn_info::dump (FILE *file) const
-{
- fprintf (file, "[");
- if (uninit_p ())
- fprintf (file, "UNINITIALIZED,");
- else if (valid_p ())
- fprintf (file, "VALID,");
- else if (unknown_p ())
- fprintf (file, "UNKNOWN,");
- else if (empty_p ())
- fprintf (file, "EMPTY,");
- else
- fprintf (file, "DIRTY,");
-
- fprintf (file, "Demand field={%d(VL),", demand_p (DEMAND_AVL));
- fprintf (file, "%d(DEMAND_NONZERO_AVL),", demand_p (DEMAND_NONZERO_AVL));
- fprintf (file, "%d(SEW),", demand_p (DEMAND_SEW));
- fprintf (file, "%d(DEMAND_GE_SEW),", demand_p (DEMAND_GE_SEW));
- fprintf (file, "%d(LMUL),", demand_p (DEMAND_LMUL));
- fprintf (file, "%d(RATIO),", demand_p (DEMAND_RATIO));
- fprintf (file, "%d(TAIL_POLICY),", demand_p (DEMAND_TAIL_POLICY));
- fprintf (file, "%d(MASK_POLICY)}\n", demand_p (DEMAND_MASK_POLICY));
-
- fprintf (file, "AVL=");
- print_rtl_single (file, get_avl ());
- fprintf (file, "SEW=%d,", get_sew ());
- fprintf (file, "VLMUL=%s,", vlmul_to_str (get_vlmul ()));
- fprintf (file, "RATIO=%d,", get_ratio ());
- fprintf (file, "TAIL_POLICY=%s,", policy_to_str (get_ta ()));
- fprintf (file, "MASK_POLICY=%s", policy_to_str (get_ma ()));
- fprintf (file, "]\n");
-
- if (valid_p ())
- {
- if (get_insn ())
- {
- fprintf (file, "The real INSN=");
- print_rtl_single (file, get_insn ()->rtl ());
- }
- }
-}
+ if (prev.has_vlmax_avl () && next.has_vlmax_avl ())
+ return true;
+ else if (prev.has_imm_avl () && next.has_imm_avl ())
+ return INTVAL (prev.get_avl ()) == INTVAL (next.get_avl ());
+ else if (prev.has_reg_vl () && next.has_reg_avl ()
+ && REGNO (prev.get_vl ()) == REGNO (next.get_avl ()))
+ {
+ insn_info *prev_insn = prev.get_insn ();
+ if (prev.get_bb () != prev_insn->bb ())
+ prev_insn = prev.get_bb ()->end_insn ();
+
+ insn_info *next_insn = next.get_insn ();
+ if (next.get_bb () != next_insn->bb ())
+ next_insn = next.get_bb ()->end_insn ();
+
+ return safe_move_avl_vl_p (prev_insn, next_insn, next, false);
+ // if (prev.get_bb () != next.get_bb () || !prev.is_in_origin_bb ())
+ // return false;
+ // insn_info *prev_insn = prev.get_insn ();
+ // insn_info *curr_insn = next.get_bb ()->end_insn ();
+ // if (def_reg_between (prev_insn, curr_insn, REGNO (next.get_avl ())))
+ // return false;
+ // return true;
+ }
+ else if (prev.has_reg_avl () && next.has_reg_avl ())
+ return same_reg_avl_p (prev, next);
-vector_infos_manager::vector_infos_manager ()
-{
- vector_edge_list = nullptr;
- vector_kill = nullptr;
- vector_del = nullptr;
- vector_insert = nullptr;
- vector_antic = nullptr;
- vector_transp = nullptr;
- vector_comp = nullptr;
- vector_avin = nullptr;
- vector_avout = nullptr;
- vector_antin = nullptr;
- vector_antout = nullptr;
- vector_earliest = nullptr;
- vector_insn_infos.safe_grow_cleared (get_max_uid ());
- vector_block_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
- if (!optimize)
- {
- basic_block cfg_bb;
- rtx_insn *rinsn;
- FOR_ALL_BB_FN (cfg_bb, cfun)
- {
- vector_block_infos[cfg_bb->index].local_dem = vector_insn_info ();
- vector_block_infos[cfg_bb->index].reaching_out = vector_insn_info ();
- FOR_BB_INSNS (cfg_bb, rinsn)
- vector_insn_infos[INSN_UID (rinsn)].parse_insn (rinsn);
- }
- }
- else
- {
- for (const bb_info *bb : crtl->ssa->bbs ())
- {
- vector_block_infos[bb->index ()].local_dem = vector_insn_info ();
- vector_block_infos[bb->index ()].reaching_out = vector_insn_info ();
- for (insn_info *insn : bb->real_insns ())
- vector_insn_infos[insn->uid ()].parse_insn (insn);
- vector_block_infos[bb->index ()].probability = profile_probability ();
- }
- }
-}
+ return false;
+ }
+ inline bool equal_avl_or_prev_non_zero_avl_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ return equal_avl_p (prev, next) || prev.has_non_zero_avl ();
+ }
+
+ inline bool can_use_next_avl_p (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ if (!next.has_reg_avl () && !next.has_reg_vl ())
+ return true;
-void
-vector_infos_manager::create_expr (vector_insn_info &info)
-{
- for (size_t i = 0; i < vector_exprs.length (); i++)
- if (*vector_exprs[i] == info)
+ insn_info *prev_insn = prev.get_insn ();
+ if (prev.get_bb () != prev_insn->bb ())
+ prev_insn = prev.get_bb ()->end_insn ();
+
+ insn_info *next_insn = next.get_insn ();
+ if (next.get_bb () != next_insn->bb ())
+ next_insn = next.get_bb ()->end_insn ();
+
+ return safe_move_avl_vl_p (prev_insn, next_insn, next);
+ }
+
+ inline bool equal_avl_or_next_non_zero_avl_and_can_use_next_avl_p (
+ const vsetvl_info &prev, const vsetvl_info &next)
+ {
+ return equal_avl_p (prev, next)
+ || (next.has_non_zero_avl () && can_use_next_avl_p (prev, next));
+ }
+
+ /* modifiers */
+
+ inline void nop (const vsetvl_info &prev ATTRIBUTE_UNUSED,
+ const vsetvl_info &next ATTRIBUTE_UNUSED)
+ {}
+
+ /* modifiers for sew and lmul */
+
+ inline void use_min_max_sew (vsetvl_info &prev, const vsetvl_info &next)
+ {
+ prev.set_max_sew (MIN (prev.get_max_sew (), next.get_max_sew ()));
+ }
+ inline void use_next_sew (vsetvl_info &prev, const vsetvl_info &next)
+ {
+ prev.set_sew (next.get_sew ());
+ use_min_max_sew (prev, next);
+ }
+ inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next)
+ {
+ auto max_sew = std::max (prev.get_sew (), next.get_sew ());
+ prev.set_sew (max_sew);
+ use_min_max_sew (prev, next);
+ }
+ inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
+ {
+ use_next_sew (prev, next);
+ prev.set_vlmul (next.get_vlmul ());
+ prev.set_ratio (next.get_ratio ());
+ }
+ inline void use_next_sew_with_prev_ratio (vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ use_next_sew (prev, next);
+ prev.set_vlmul (calculate_vlmul (next.get_sew (), prev.get_ratio ()));
+ }
+ inline void modify_lmul_with_next_ratio (vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
+ prev.set_ratio (next.get_ratio ());
+ }
+
+ inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
+ use_max_sew (prev, next);
+ prev.set_ratio (next.get_ratio ());
+ }
+
+ inline void use_max_sew_and_lmul_with_prev_ratio (vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ auto max_sew = std::max (prev.get_sew (), next.get_sew ());
+ prev.set_vlmul (calculate_vlmul (max_sew, prev.get_ratio ()));
+ prev.set_sew (max_sew);
+ }
+
+ /* modifiers for tail and mask policy */
+
+ inline void use_tail_policy (vsetvl_info &prev, const vsetvl_info &next)
+ {
+ if (!next.get_ta ())
+ prev.set_ta (next.get_ta ());
+ }
+ inline void use_mask_policy (vsetvl_info &prev, const vsetvl_info &next)
+ {
+ if (!next.get_ma ())
+ prev.set_ma (next.get_ma ());
+ }
+ inline void use_tail_mask_policy (vsetvl_info &prev, const vsetvl_info &next)
+ {
+ use_tail_policy (prev, next);
+ use_mask_policy (prev, next);
+ }
+
+ /* modifiers for avl */
+
+ inline void use_next_avl (vsetvl_info &prev, const vsetvl_info &next)
+ {
+ gcc_assert (can_use_next_avl_p (prev, next));
+ prev.update_avl (next);
+ }
+
+ inline void use_next_avl_when_not_equal (vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ if (equal_avl_p (prev, next))
return;
- vector_exprs.safe_push (&info);
-}
-
-size_t
-vector_infos_manager::get_expr_id (const vector_insn_info &info) const
-{
- for (size_t i = 0; i < vector_exprs.length (); i++)
- if (*vector_exprs[i] == info)
- return i;
- gcc_unreachable ();
-}
-
-auto_vec<size_t>
-vector_infos_manager::get_all_available_exprs (
- const vector_insn_info &info) const
-{
- auto_vec<size_t> available_list;
- for (size_t i = 0; i < vector_exprs.length (); i++)
- if (info.available_p (*vector_exprs[i]))
- available_list.safe_push (i);
- return available_list;
-}
+ gcc_assert (next.has_non_zero_avl ());
+ use_next_avl (prev, next);
+ }
-bool
-vector_infos_manager::all_same_ratio_p (sbitmap bitdata) const
-{
- if (bitmap_empty_p (bitdata))
- return false;
+public:
+ demand_system () : m_avl_def_in (nullptr), m_avl_def_out (nullptr) {}
+
+ void set_avl_in_out_data (sbitmap *avl_def_in, sbitmap *avl_def_out)
+ {
+ m_avl_def_in = avl_def_in;
+ m_avl_def_out = avl_def_out;
+ }
+
+ /* Can we move vsetvl info between prev_insn and next_insn safe? */
+ bool safe_move_avl_vl_p (insn_info *prev_insn, insn_info *next_insn,
+ const vsetvl_info &info, bool ignore_vl = false)
+ {
+ gcc_assert ((ignore_vl && info.has_reg_avl ())
+ || (info.has_reg_avl () || info.has_reg_vl ()));
+
+ gcc_assert (!prev_insn->is_debug_insn () && !next_insn->is_debug_insn ());
+ if (prev_insn->bb () == next_insn->bb ()
+ && prev_insn->compare_with (next_insn) < 0)
+ {
+ /* 当个BB里面从上往下,不跨边 */
+ for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn;
+ i = i->prev_nondebug_insn ())
+ {
+ // no def amd use of vl
+ if (!ignore_vl && def_or_use_vl_p (i, info))
+ return false;
- int ratio = -1;
- unsigned int bb_index;
- sbitmap_iterator sbi;
+ // no def of avl
+ if (def_avl_p (i, info))
+ return false;
+ }
+ return true;
+ }
+ else
+ {
+ /* 跨边:1. 不同BB之间,2. 同个BB之间循环 */
+ if (!ignore_vl && info.has_reg_vl ())
+ {
+ /* 如果prev_bb的live out中包含了vl,
+ * 则无法安全的将info的vl在prev_insn处修改 */
+ bitmap live_out = df_get_live_out (prev_insn->bb ()->cfg_bb ());
+ if (bitmap_bit_p (live_out, REGNO (info.get_vl ())))
+ return false;
+ }
- EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
- {
- if (ratio == -1)
- ratio = vector_exprs[bb_index]->get_ratio ();
- else if (vector_exprs[bb_index]->get_ratio () != ratio)
- return false;
- }
- return true;
-}
+ if (info.has_reg_avl () && m_avl_def_in && m_avl_def_out)
+ {
+ bool has_avl_out = false;
+ unsigned regno = REGNO (info.get_avl ());
+ unsigned expr_id;
+ sbitmap_iterator sbi;
+ EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out[prev_insn->bb ()->index ()],
+ 0, expr_id, sbi)
+ {
+ if (get_regno (expr_id, last_basic_block_for_fn (cfun))
+ != regno)
+ continue;
+ has_avl_out = true;
+ if (!bitmap_bit_p (m_avl_def_in[next_insn->bb ()->index ()],
+ expr_id))
+ return false;
+ }
+ /* 如果avl不在prev_bb的avl_out中 */
+ if (!has_avl_out)
+ return false;
+ }
-/* Return TRUE if the incoming vector configuration state
- to CFG_BB is compatible with the vector configuration
- state in CFG_BB, FALSE otherwise. */
-bool
-vector_infos_manager::all_avail_in_compatible_p (const basic_block cfg_bb) const
-{
- const auto &info = vector_block_infos[cfg_bb->index].local_dem;
- sbitmap avin = vector_avin[cfg_bb->index];
- unsigned int bb_index;
- sbitmap_iterator sbi;
- EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
- {
- const auto &avin_info
- = static_cast<const vl_vtype_info &> (*vector_exprs[bb_index]);
- if (!info.compatible_p (avin_info))
- return false;
- }
- return true;
-}
+ /* 如果是原始info,则需要判断next_insn之前的指令有没有修改avl和使用vl */
+ for (insn_info *i = next_insn; i != next_insn->bb ()->head_insn ();
+ i = i->prev_nondebug_insn ())
+ {
+ // no def amd use of vl
+ if (!ignore_vl && def_or_use_vl_p (i, info))
+ return false;
-bool
-vector_infos_manager::all_same_avl_p (const basic_block cfg_bb,
- sbitmap bitdata) const
-{
- if (bitmap_empty_p (bitdata))
- return false;
+ // no def of avl
+ if (def_avl_p (i, info))
+ return false;
+ }
+
+ /* 如果是原始info,则需要判断prev_insn之后的指令有没有修改avl和使用vl */
+ for (insn_info *i = prev_insn->bb ()->end_insn (); i != prev_insn;
+ i = i->prev_nondebug_insn ())
+ {
+ // no def amd use of vl
+ if (!ignore_vl && def_or_use_vl_p (i, info))
+ return false;
- const auto &block_info = vector_block_infos[cfg_bb->index];
- if (!block_info.local_dem.demand_p (DEMAND_AVL))
+ // no def of avl
+ if (def_avl_p (i, info))
+ return false;
+ }
+ }
return true;
+ }
+
+ bool compatible_sew_lmul_with (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ gcc_assert (prev.valid_p () && next.valid_p ());
+ sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
+ sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
+#define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
+ AVAILABLE_P, FUSE) \
+ if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
+ && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
+ return COMPATIBLE_P (prev, next);
- avl_info avl = block_info.local_dem.get_avl_info ();
- unsigned int bb_index;
- sbitmap_iterator sbi;
+#include "riscv-vsetvl.def"
- EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
- {
- if (vector_exprs[bb_index]->get_avl_info () != avl)
- return false;
- }
- return true;
-}
+ gcc_unreachable ();
+ }
+
+ bool available_sew_lmul_with (const vsetvl_info &prev,
+ const vsetvl_info &next)
+ {
+ gcc_assert (prev.valid_p () && next.valid_p ());
+ sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
+ sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
+#define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
+ AVAILABLE_P, FUSE) \
+ if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
+ && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
+ return AVAILABLE_P (prev, next);
-bool
-vector_infos_manager::earliest_fusion_worthwhile_p (
- const basic_block cfg_bb) const
-{
- edge e;
- edge_iterator ei;
- profile_probability prob = profile_probability::uninitialized ();
- FOR_EACH_EDGE (e, ei, cfg_bb->succs)
- {
- if (prob == profile_probability::uninitialized ())
- prob = vector_block_infos[e->dest->index].probability;
- else if (prob == vector_block_infos[e->dest->index].probability)
- continue;
- else
- /* We pick the highest probability among those incompatible VSETVL
- infos. When all incompatible VSTEVL infos have same probability, we
- don't pick any of them. */
- return true;
- }
- return false;
-}
+#include "riscv-vsetvl.def"
-bool
-vector_infos_manager::vsetvl_dominated_by_all_preds_p (
- const basic_block cfg_bb, const vector_insn_info &info) const
-{
- edge e;
- edge_iterator ei;
- FOR_EACH_EDGE (e, ei, cfg_bb->preds)
- {
- const auto &reaching_out = vector_block_infos[e->src->index].reaching_out;
- if (e->src->index == cfg_bb->index && reaching_out.compatible_p (info))
- continue;
- if (!vsetvl_dominated_by_p (e->src, info, reaching_out, false))
- return false;
+ gcc_unreachable ();
+ }
+
+ void merge_sew_lmul_with (vsetvl_info &prev, const vsetvl_info &next)
+ {
+ gcc_assert (prev.valid_p () && next.valid_p ());
+ sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
+ sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
+#define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
+ AVAILABLE_P, FUSE) \
+ if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
+ && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
+ { \
+ gcc_assert (COMPATIBLE_P (prev, next)); \
+ FUSE (prev, next); \
+ prev.set_sew_lmul_demand (sew_lmul_demand_type::NEW_FLAGS); \
+ return; \
}
- return true;
-}
-size_t
-vector_infos_manager::expr_set_num (sbitmap bitdata) const
-{
- size_t count = 0;
- for (size_t i = 0; i < vector_exprs.length (); i++)
- if (bitmap_bit_p (bitdata, i))
- count++;
- return count;
-}
+#include "riscv-vsetvl.def"
-void
-vector_infos_manager::release (void)
-{
- if (!vector_insn_infos.is_empty ())
- vector_insn_infos.release ();
- if (!vector_block_infos.is_empty ())
- vector_block_infos.release ();
- if (!vector_exprs.is_empty ())
- vector_exprs.release ();
-
- gcc_assert (to_refine_vsetvls.is_empty ());
- gcc_assert (to_delete_vsetvls.is_empty ());
- if (optimize > 0)
- free_bitmap_vectors ();
-}
+ gcc_unreachable ();
+ }
-void
-vector_infos_manager::create_bitmap_vectors (void)
-{
- /* Create the bitmap vectors. */
- vector_antic = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
- vector_exprs.length ());
- vector_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
- vector_exprs.length ());
- vector_comp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
- vector_exprs.length ());
- vector_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
- vector_exprs.length ());
- vector_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
- vector_exprs.length ());
- vector_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
- vector_exprs.length ());
- vector_antin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
- vector_exprs.length ());
- vector_antout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
- vector_exprs.length ());
-
- bitmap_vector_ones (vector_transp, last_basic_block_for_fn (cfun));
- bitmap_vector_clear (vector_antic, last_basic_block_for_fn (cfun));
- bitmap_vector_clear (vector_comp, last_basic_block_for_fn (cfun));
- vector_edge_list = create_edge_list ();
- vector_earliest = sbitmap_vector_alloc (NUM_EDGES (vector_edge_list),
- vector_exprs.length ());
-}
+ bool compatible_policy_with (const vsetvl_info &prev, const vsetvl_info &next)
+ {
+ gcc_assert (prev.valid_p () && next.valid_p ());
+ policy_demand_type prev_flags = prev.get_policy_demand ();
+ policy_demand_type next_flags = next.get_policy_demand ();
+#define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
+ AVAILABLE_P, FUSE) \
+ if (prev_flags == policy_demand_type::PREV_FLAGS \
+ && next_flags == policy_demand_type::NEXT_FLAGS) \
+ return COMPATIBLE_P (prev, next);
-void
-vector_infos_manager::free_bitmap_vectors (void)
-{
- /* Finished. Free up all the things we've allocated. */
- free_edge_list (vector_edge_list);
- if (vector_del)
- sbitmap_vector_free (vector_del);
- if (vector_insert)
- sbitmap_vector_free (vector_insert);
- if (vector_kill)
- sbitmap_vector_free (vector_kill);
- if (vector_antic)
- sbitmap_vector_free (vector_antic);
- if (vector_transp)
- sbitmap_vector_free (vector_transp);
- if (vector_comp)
- sbitmap_vector_free (vector_comp);
- if (vector_avin)
- sbitmap_vector_free (vector_avin);
- if (vector_avout)
- sbitmap_vector_free (vector_avout);
- if (vector_antin)
- sbitmap_vector_free (vector_antin);
- if (vector_antout)
- sbitmap_vector_free (vector_antout);
- if (vector_earliest)
- sbitmap_vector_free (vector_earliest);
-
- vector_edge_list = nullptr;
- vector_kill = nullptr;
- vector_del = nullptr;
- vector_insert = nullptr;
- vector_antic = nullptr;
- vector_transp = nullptr;
- vector_comp = nullptr;
- vector_avin = nullptr;
- vector_avout = nullptr;
- vector_antin = nullptr;
- vector_antout = nullptr;
- vector_earliest = nullptr;
-}
+#include "riscv-vsetvl.def"
-void
-vector_infos_manager::dump (FILE *file) const
-{
- basic_block cfg_bb;
- rtx_insn *rinsn;
+ gcc_unreachable ();
+ }
- fprintf (file, "\n");
- FOR_ALL_BB_FN (cfg_bb, cfun)
- {
- fprintf (file, "Local vector info of <bb %d>:\n", cfg_bb->index);
- fprintf (file, "<HEADER>=");
- vector_block_infos[cfg_bb->index].local_dem.dump (file);
- FOR_BB_INSNS (cfg_bb, rinsn)
- {
- if (!NONDEBUG_INSN_P (rinsn) || !has_vtype_op (rinsn))
- continue;
- fprintf (file, "<insn %d>=", INSN_UID (rinsn));
- const auto &info = vector_insn_infos[INSN_UID (rinsn)];
- info.dump (file);
- }
- fprintf (file, "<FOOTER>=");
- vector_block_infos[cfg_bb->index].reaching_out.dump (file);
- fprintf (file, "<Probability>=");
- vector_block_infos[cfg_bb->index].probability.dump (file);
- fprintf (file, "\n\n");
+ bool available_policy_with (const vsetvl_info &prev, const vsetvl_info &next)
+ {
+ gcc_assert (prev.valid_p () && next.valid_p ());
+ policy_demand_type prev_flags = prev.get_policy_demand ();
+ policy_demand_type next_flags = next.get_policy_demand ();
+#define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
+ AVAILABLE_P, FUSE) \
+ if (prev_flags == policy_demand_type::PREV_FLAGS \
+ && next_flags == policy_demand_type::NEXT_FLAGS) \
+ return AVAILABLE_P (prev, next);
+
+#include "riscv-vsetvl.def"
+
+ gcc_unreachable ();
+ }
+
+ void merge_policy_with (vsetvl_info &prev, const vsetvl_info &next)
+ {
+ gcc_assert (prev.valid_p () && next.valid_p ());
+ policy_demand_type prev_flags = prev.get_policy_demand ();
+ policy_demand_type next_flags = next.get_policy_demand ();
+#define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
+ AVAILABLE_P, FUSE) \
+ if (prev_flags == policy_demand_type::PREV_FLAGS \
+ && next_flags == policy_demand_type::NEXT_FLAGS) \
+ { \
+ gcc_assert (COMPATIBLE_P (prev, next)); \
+ FUSE (prev, next); \
+ prev.set_policy_demand (policy_demand_type::NEW_FLAGS); \
+ return; \
}
- fprintf (file, "\n");
- FOR_ALL_BB_FN (cfg_bb, cfun)
- {
- fprintf (file, "Local properties of <bb %d>:\n", cfg_bb->index);
+#include "riscv-vsetvl.def"
- fprintf (file, "<ANTLOC>=");
- if (vector_antic == nullptr)
- fprintf (file, "(nil)\n");
- else
- dump_bitmap_file (file, vector_antic[cfg_bb->index]);
+ gcc_unreachable ();
+ }
- fprintf (file, "<AVLOC>=");
- if (vector_comp == nullptr)
- fprintf (file, "(nil)\n");
- else
- dump_bitmap_file (file, vector_comp[cfg_bb->index]);
+ bool compatible_avl_with (const vsetvl_info &prev, const vsetvl_info &next)
+ {
+ gcc_assert (prev.valid_p () && next.valid_p ());
+ avl_demand_type prev_flags = prev.get_avl_demand ();
+ avl_demand_type next_flags = next.get_avl_demand ();
+#define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
+ AVAILABLE_P, FUSE) \
+ if (prev_flags == avl_demand_type::PREV_FLAGS \
+ && next_flags == avl_demand_type::NEXT_FLAGS) \
+ return COMPATIBLE_P (prev, next);
- fprintf (file, "<TRANSP>=");
- if (vector_transp == nullptr)
- fprintf (file, "(nil)\n");
- else
- dump_bitmap_file (file, vector_transp[cfg_bb->index]);
+#include "riscv-vsetvl.def"
- fprintf (file, "<KILL>=");
- if (vector_kill == nullptr)
- fprintf (file, "(nil)\n");
- else
- dump_bitmap_file (file, vector_kill[cfg_bb->index]);
+ gcc_unreachable ();
+ }
- fprintf (file, "<ANTIN>=");
- if (vector_antin == nullptr)
- fprintf (file, "(nil)\n");
- else
- dump_bitmap_file (file, vector_antin[cfg_bb->index]);
+ bool available_avl_with (const vsetvl_info &prev, const vsetvl_info &next)
+ {
+ gcc_assert (prev.valid_p () && next.valid_p ());
+ avl_demand_type prev_flags = prev.get_avl_demand ();
+ avl_demand_type next_flags = next.get_avl_demand ();
+#define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
+ AVAILABLE_P, FUSE) \
+ if (prev_flags == avl_demand_type::PREV_FLAGS \
+ && next_flags == avl_demand_type::NEXT_FLAGS) \
+ return AVAILABLE_P (prev, next);
- fprintf (file, "<ANTOUT>=");
- if (vector_antout == nullptr)
- fprintf (file, "(nil)\n");
- else
- dump_bitmap_file (file, vector_antout[cfg_bb->index]);
- }
+#include "riscv-vsetvl.def"
- fprintf (file, "\n");
- FOR_ALL_BB_FN (cfg_bb, cfun)
- {
- fprintf (file, "Global LCM (Lazy code motion) result of <bb %d>:\n",
- cfg_bb->index);
+ gcc_unreachable ();
+ }
+
+ void merge_avl_with (vsetvl_info &prev, const vsetvl_info &next)
+ {
+ gcc_assert (prev.valid_p () && next.valid_p ());
+ avl_demand_type prev_flags = prev.get_avl_demand ();
+ avl_demand_type next_flags = next.get_avl_demand ();
+#define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
+ AVAILABLE_P, FUSE) \
+ if (prev_flags == avl_demand_type::PREV_FLAGS \
+ && next_flags == avl_demand_type::NEXT_FLAGS) \
+ { \
+ gcc_assert (COMPATIBLE_P (prev, next)); \
+ FUSE (prev, next); \
+ prev.set_avl_demand (avl_demand_type::NEW_FLAGS); \
+ return; \
+ }
- fprintf (file, "<AVIN>=");
- if (vector_avin == nullptr)
- fprintf (file, "(nil)\n");
- else
- dump_bitmap_file (file, vector_avin[cfg_bb->index]);
+#include "riscv-vsetvl.def"
- fprintf (file, "<AVOUT>=");
- if (vector_avout == nullptr)
- fprintf (file, "(nil)\n");
- else
- dump_bitmap_file (file, vector_avout[cfg_bb->index]);
+ gcc_unreachable ();
+ }
+
+ bool compatible_with (const vsetvl_info &prev, const vsetvl_info &next)
+ {
+ bool compatible_p = compatible_sew_lmul_with (prev, next)
+ && compatible_policy_with (prev, next)
+ && compatible_avl_with (prev, next);
+ return compatible_p;
+ }
+
+ bool available_with (const vsetvl_info &prev, const vsetvl_info &next)
+ {
+ bool available_p = available_sew_lmul_with (prev, next)
+ && available_policy_with (prev, next)
+ && available_avl_with (prev, next);
+ gcc_assert (!available_p || compatible_with (prev, next));
+ return available_p;
+ }
+
+ void merge_with (vsetvl_info &prev, const vsetvl_info &next)
+ {
+ gcc_assert (compatible_with (prev, next));
+ merge_sew_lmul_with (prev, next);
+ merge_policy_with (prev, next);
+ merge_avl_with (prev, next);
+ gcc_assert (available_with (prev, next));
+ }
+};
- fprintf (file, "<DELETE>=");
- if (vector_del == nullptr)
- fprintf (file, "(nil)\n");
- else
- dump_bitmap_file (file, vector_del[cfg_bb->index]);
- }
+/* Emit vsetvl instruction. */
+static rtx
+gen_vsetvl_pat (enum vsetvl_type insn_type, const vsetvl_info &info, rtx vl)
+{
+ rtx avl = info.get_avl ();
+ /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s,
+ set the value of avl to (const_int 0) so that VSETVL PASS will
+ insert vsetvl correctly.*/
+ if (!info.get_avl ())
+ avl = GEN_INT (0);
+ rtx sew = gen_int_mode (info.get_sew (), Pmode);
+ rtx vlmul = gen_int_mode (info.get_vlmul (), Pmode);
+ rtx ta = gen_int_mode (info.get_ta (), Pmode);
+ rtx ma = gen_int_mode (info.get_ma (), Pmode);
- for (size_t i = 0; i < vector_exprs.length (); i++)
+ if (insn_type == VSETVL_NORMAL)
{
- for (int ed = 0; ed < NUM_EDGES (vector_edge_list); ed++)
- {
- edge eg = INDEX_EDGE (vector_edge_list, ed);
- if (vector_insert)
- {
- if (bitmap_bit_p (vector_insert[ed], i))
- {
- fprintf (file,
- "\nGlobal LCM (Lazy code motion) INSERT info:\n");
- fprintf (file,
- "INSERT edge %d from <bb %d> to <bb %d> for VSETVL "
- "expr[%ld]\n",
- ed, eg->src->index, eg->dest->index, i);
- }
- }
- else
- {
- if (bitmap_bit_p (vector_earliest[ed], i))
- {
- fprintf (file,
- "\nGlobal LCM (Lazy code motion) EARLIEST info:\n");
- fprintf (
- file,
- "EARLIEST edge %d from <bb %d> to <bb %d> for VSETVL "
- "expr[%ld]\n",
- ed, eg->src->index, eg->dest->index, i);
- }
- }
- }
+ gcc_assert (vl != NULL_RTX);
+ return gen_vsetvl (Pmode, vl, avl, sew, vlmul, ta, ma);
}
+ else if (insn_type == VSETVL_VTYPE_CHANGE_ONLY)
+ return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma);
+ else
+ return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma);
}
-const pass_data pass_data_vsetvl = {
- RTL_PASS, /* type */
- "vsetvl", /* name */
- OPTGROUP_NONE, /* optinfo_flags */
- TV_NONE, /* tv_id */
- 0, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- 0, /* todo_flags_finish */
-};
-
-class pass_vsetvl : public rtl_opt_pass
+class vsetvl_block_info
{
-private:
- vector_infos_manager *m_vector_manager;
-
- const vector_insn_info &get_vector_info (const rtx_insn *) const;
- const vector_insn_info &get_vector_info (const insn_info *) const;
- const vector_block_info &get_block_info (const basic_block) const;
- const vector_block_info &get_block_info (const bb_info *) const;
- vector_block_info &get_block_info (const basic_block);
- vector_block_info &get_block_info (const bb_info *);
- void update_vector_info (const insn_info *, const vector_insn_info &);
- void update_block_info (int, profile_probability, const vector_insn_info &);
-
- void simple_vsetvl (void) const;
- void lazy_vsetvl (void);
-
- /* Phase 1. */
- void compute_local_backward_infos (const bb_info *);
-
- /* Phase 2. */
- bool need_vsetvl (const vector_insn_info &, const vector_insn_info &) const;
- void transfer_before (vector_insn_info &, insn_info *) const;
- void transfer_after (vector_insn_info &, insn_info *) const;
- void emit_local_forward_vsetvls (const bb_info *);
-
- /* Phase 3. */
- bool earliest_fusion (void);
- void vsetvl_fusion (void);
-
- /* Phase 4. */
- void prune_expressions (void);
- void compute_local_properties (void);
- bool can_refine_vsetvl_p (const basic_block, const vector_insn_info &) const;
- void refine_vsetvls (void) const;
- void cleanup_vsetvls (void);
- bool commit_vsetvls (void);
- void pre_vsetvl (void);
-
- /* Phase 5. */
- rtx_insn *get_vsetvl_at_end (const bb_info *, vector_insn_info *) const;
- void local_eliminate_vsetvl_insn (const bb_info *) const;
- bool global_eliminate_vsetvl_insn (const bb_info *) const;
- void ssa_post_optimization (void) const;
-
- /* Phase 6. */
- void df_post_optimization (void) const;
-
- void init (void);
- void done (void);
- void compute_probabilities (void);
-
public:
- pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {}
-
- /* opt_pass methods: */
- virtual bool gate (function *) final override { return TARGET_VECTOR; }
- virtual unsigned int execute (function *) final override;
-}; // class pass_vsetvl
+ /* The static execute probability of the demand info. */
+ profile_probability probability;
-const vector_insn_info &
-pass_vsetvl::get_vector_info (const rtx_insn *i) const
-{
- return m_vector_manager->vector_insn_infos[INSN_UID (i)];
-}
+ auto_vec<vsetvl_info> infos;
+ vsetvl_info m_info;
+ bb_info *m_bb;
-const vector_insn_info &
-pass_vsetvl::get_vector_info (const insn_info *i) const
-{
- return m_vector_manager->vector_insn_infos[i->uid ()];
-}
+ /* 表示block_info中的第一条vsetvl_info可以安全的删除,因为所有avin都对其可用.
+ */
+ bool full_available;
+
+ vsetvl_block_info () : m_bb (nullptr), full_available (false)
+ {
+ infos.safe_grow_cleared (0);
+ m_info.set_empty ();
+ }
+ vsetvl_block_info (const vsetvl_block_info &other)
+ : probability (other.probability), infos (other.infos.copy ()),
+ m_info (other.m_info), m_bb (other.m_bb)
+ {}
+
+ vsetvl_info &get_header_info ()
+ {
+ gcc_assert (!empty_p ());
+ return infos.is_empty () ? m_info : infos[0];
+ }
+ vsetvl_info &get_footer_info ()
+ {
+ gcc_assert (!empty_p ());
+ return infos.is_empty () ? m_info : infos[infos.length () - 1];
+ }
+ const vsetvl_info &get_header_info () const
+ {
+ gcc_assert (!empty_p ());
+ return infos.is_empty () ? m_info : infos[0];
+ }
+ const vsetvl_info &get_footer_info () const
+ {
+ gcc_assert (!empty_p ());
+ return infos.is_empty () ? m_info : infos[infos.length () - 1];
+ }
+
+ bool empty_p () const { return infos.is_empty () && !has_info (); }
+ bool has_info () const { return !m_info.empty_p (); }
+ void set_info (const vsetvl_info &info)
+ {
+ gcc_assert (infos.is_empty ());
+ m_info = info;
+ m_info.set_bb (m_bb);
+ }
+ void set_empty_info () { m_info.set_empty (); }
+};
-const vector_block_info &
-pass_vsetvl::get_block_info (const basic_block bb) const
+static rtx
+gen_vsetvl_pat (rtx_insn *rinsn, const vsetvl_info &info, rtx vl = NULL_RTX)
{
- return m_vector_manager->vector_block_infos[bb->index];
-}
+ rtx new_pat;
+ vsetvl_info new_info = info;
+ /* For vmv.x.s, use 0 for avl. */
+ if (!info.get_avl ())
+ {
+ new_info.set_avl (const0_rtx);
+ new_info.set_avl_def (nullptr);
+ }
-const vector_block_info &
-pass_vsetvl::get_block_info (const bb_info *bb) const
-{
- return m_vector_manager->vector_block_infos[bb->index ()];
+ if (vl)
+ new_pat = gen_vsetvl_pat (VSETVL_NORMAL, new_info, vl);
+ else
+ {
+ if (vsetvl_insn_p (rinsn) && !info.change_vtype_only_p ())
+ new_pat = gen_vsetvl_pat (VSETVL_NORMAL, new_info, get_vl (rinsn));
+ else if (info.change_vtype_only_p ()
+ || INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only)
+ new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, new_info, NULL_RTX);
+ else
+ new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX);
+ }
+ return new_pat;
}
-vector_block_info &
-pass_vsetvl::get_block_info (const basic_block bb)
+static void
+emit_vsetvl_insn (enum vsetvl_type insn_type, enum emit_type emit_type,
+ const vsetvl_info &info, rtx vl, rtx_insn *rinsn)
{
- return m_vector_manager->vector_block_infos[bb->index];
-}
+ rtx pat = gen_vsetvl_pat (insn_type, info, vl);
-vector_block_info &
-pass_vsetvl::get_block_info (const bb_info *bb)
-{
- return m_vector_manager->vector_block_infos[bb->index ()];
+ if (emit_type == EMIT_DIRECT)
+ {
+ emit_insn (pat);
+ if (dump_file)
+ {
+ fprintf (dump_file, " Insert vsetvl insn %d:\n",
+ INSN_UID (get_last_insn ()));
+ print_rtl_single (dump_file, get_last_insn ());
+ }
+ }
+ else if (emit_type == EMIT_BEFORE)
+ {
+ emit_insn_before (pat, rinsn);
+ if (dump_file)
+ {
+ fprintf (dump_file, " Insert vsetvl insn before insn %d:\n",
+ INSN_UID (rinsn));
+ print_rtl_single (dump_file, PREV_INSN (rinsn));
+ }
+ }
+ else
+ {
+ emit_insn_after (pat, rinsn);
+ if (dump_file)
+ {
+ fprintf (dump_file, " Insert vsetvl insn after insn %d:\n",
+ INSN_UID (rinsn));
+ print_rtl_single (dump_file, NEXT_INSN (rinsn));
+ }
+ }
}
-void
-pass_vsetvl::update_vector_info (const insn_info *i,
- const vector_insn_info &new_info)
+static void
+eliminate_insn (rtx_insn *rinsn)
{
- m_vector_manager->vector_insn_infos[i->uid ()] = new_info;
+ if (dump_file)
+ {
+ fprintf (dump_file, " Eliminate insn %d:\n", INSN_UID (rinsn));
+ print_rtl_single (dump_file, rinsn);
+ }
+ if (in_sequence_p ())
+ remove_insn (rinsn);
+ else
+ delete_insn (rinsn);
}
-void
-pass_vsetvl::update_block_info (int index, profile_probability prob,
- const vector_insn_info &new_info)
+/* Change insn and Assert the change always happens. */
+static void
+validate_change_or_fail (rtx object, rtx *loc, rtx new_rtx, bool in_group)
{
- m_vector_manager->vector_block_infos[index].probability = prob;
- if (m_vector_manager->vector_block_infos[index].local_dem
- == m_vector_manager->vector_block_infos[index].reaching_out)
- m_vector_manager->vector_block_infos[index].local_dem = new_info;
- m_vector_manager->vector_block_infos[index].reaching_out = new_info;
+ bool change_p = validate_change (object, loc, new_rtx, in_group);
+ gcc_assert (change_p);
}
-/* Simple m_vsetvl_insert vsetvl for optimize == 0. */
-void
-pass_vsetvl::simple_vsetvl (void) const
+static void
+change_insn (rtx_insn *rinsn, rtx new_pat)
{
+ /* We don't apply change on RTL_SSA here since it's possible a
+ new INSN we add in the PASS before which doesn't have RTL_SSA
+ info yet.*/
if (dump_file)
- fprintf (dump_file,
- "\nEntering Simple VSETVL PASS and Handling %d basic blocks for "
- "function:%s\n",
- n_basic_blocks_for_fn (cfun), function_name (cfun));
-
- basic_block cfg_bb;
- rtx_insn *rinsn;
- FOR_ALL_BB_FN (cfg_bb, cfun)
{
- FOR_BB_INSNS (cfg_bb, rinsn)
- {
- if (!NONDEBUG_INSN_P (rinsn))
- continue;
- if (has_vtype_op (rinsn))
- {
- const auto info = get_vector_info (rinsn);
- emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_BEFORE, info,
- NULL_RTX, rinsn);
- }
- }
+ fprintf (dump_file, " Change insn %d from:\n", INSN_UID (rinsn));
+ print_rtl_single (dump_file, rinsn);
}
-}
-
-/* Compute demanded information by backward data-flow analysis. */
-void
-pass_vsetvl::compute_local_backward_infos (const bb_info *bb)
-{
- vector_insn_info change;
- change.set_empty ();
- auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
- block_info.reaching_out = change;
+ validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false);
- for (insn_info *insn : bb->reverse_real_nondebug_insns ())
+ if (dump_file)
{
- auto &info = get_vector_info (insn);
-
- if (info.uninit_p ())
- /* If it is uninitialized, propagate it directly. */
- update_vector_info (insn, change);
- else if (info.unknown_p ())
- change = info;
- else
- {
- gcc_assert (info.valid_p () && "Unexpected Invalid demanded info");
- if (change.valid_p ())
- {
- if (!(propagate_avl_across_demands_p (change, info)
- && !reg_available_p (insn, change))
- && change.compatible_p (info))
- {
- update_vector_info (insn, change.local_merge (info));
- /* Fix PR109399, we should update user vsetvl instruction
- if there is a change in demand fusion. */
- if (vsetvl_insn_p (insn->rtl ()))
- change_vsetvl_insn (insn, info);
- }
- }
- change = info;
- }
+ fprintf (dump_file, "\n to:\n");
+ print_rtl_single (dump_file, rinsn);
}
-
- block_info.local_dem = change;
- if (block_info.local_dem.empty_p ())
- block_info.reaching_out = block_info.local_dem;
}
-/* Return true if a dem_info is required to transition from curr_info to
- require before INSN. */
-bool
-pass_vsetvl::need_vsetvl (const vector_insn_info &require,
- const vector_insn_info &curr_info) const
+static bool
+change_insn (function_info *ssa, insn_change change, insn_info *insn,
+ rtx new_pat)
{
- if (!curr_info.valid_p () || curr_info.unknown_p () || curr_info.uninit_p ())
- return true;
+ rtx_insn *rinsn = insn->rtl ();
+ auto attempt = ssa->new_change_attempt ();
+ if (!restrict_movement (change))
+ return false;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, " Change insn %d from:\n", INSN_UID (rinsn));
+ print_rtl_single (dump_file, rinsn);
+ }
- if (require.compatible_p (static_cast<const vl_vtype_info &> (curr_info)))
+ insn_change_watermark watermark;
+ validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, true);
+
+ /* These routines report failures themselves. */
+ if (!recog (attempt, change) || !change_is_worthwhile (change, false))
return false;
+ remove_reg_equal_equiv_notes (rinsn);
+ confirm_change_group ();
+ ssa->change_insn (change);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\n to:\n");
+ print_rtl_single (dump_file, rinsn);
+ }
+
return true;
}
-/* Given an incoming state reaching INSN, modifies that state so that it is
- minimally compatible with INSN. The resulting state is guaranteed to be
- semantically legal for INSN, but may not be the state requested by INSN. */
-void
-pass_vsetvl::transfer_before (vector_insn_info &info, insn_info *insn) const
+static void
+change_vsetvl_insn (const insn_info *insn, const vsetvl_info &info)
{
- if (!has_vtype_op (insn->rtl ()))
- return;
-
- const vector_insn_info require = get_vector_info (insn);
- if (info.valid_p () && !need_vsetvl (require, info))
- return;
- info = require;
+ rtx_insn *rinsn = insn->rtl ();
+ rtx new_pat = gen_vsetvl_pat (rinsn, info);
+ change_insn (rinsn, new_pat);
}
-/* Given a state with which we evaluated insn (see transfer_before above for why
- this might be different that the state insn requested), modify the state to
- reflect the changes insn might make. */
-void
-pass_vsetvl::transfer_after (vector_insn_info &info, insn_info *insn) const
+class pre_vsetvl
{
- if (vector_config_insn_p (insn->rtl ()))
- {
- info = get_vector_info (insn);
- return;
- }
+private:
+ demand_system dem;
+ auto_vec<vsetvl_block_info> vector_block_infos;
+
+ /* data for avl reaching defintion. */
+ sbitmap avl_regs;
+ sbitmap *avl_def_in;
+ sbitmap *avl_def_out;
+ sbitmap *reg_def_loc;
+
+ /* data for vsetvl info reaching defintion. */
+ vsetvl_info unknow_info;
+ auto_vec<vsetvl_info *> vsetvl_def_exprs;
+ sbitmap *vsetvl_def_in;
+ sbitmap *vsetvl_def_out;
+
+ /* data for lcm */
+ auto_vec<vsetvl_info *> exprs;
+ sbitmap *avloc;
+ sbitmap *avin;
+ sbitmap *avout;
+ sbitmap *kill;
+ sbitmap *antloc;
+ sbitmap *transp;
+ sbitmap *insert;
+ sbitmap *del;
+ struct edge_list *edges;
+
+ auto_vec<vsetvl_info> delete_list;
+
+ vsetvl_block_info &get_block_info (const bb_info *bb)
+ {
+ return vector_block_infos[bb->index ()];
+ }
+ const vsetvl_block_info &get_block_info (const basic_block bb) const
+ {
+ return vector_block_infos[bb->index];
+ }
+
+ vsetvl_block_info &get_block_info (const basic_block bb)
+ {
+ return vector_block_infos[bb->index];
+ }
+
+ void add_expr (auto_vec<vsetvl_info *> &exprs, vsetvl_info &info)
+ {
+ for (vsetvl_info *item : exprs)
+ {
+ if (*item == info)
+ return;
+ }
+ exprs.safe_push (&info);
+ }
+
+ unsigned get_expr_index (auto_vec<vsetvl_info *> &exprs,
+ const vsetvl_info &info)
+ {
+ for (size_t i = 0; i < exprs.length (); i += 1)
+ {
+ if (*exprs[i] == info)
+ return i;
+ }
+ gcc_unreachable ();
+ }
+
+ /* 确保header_info之前的指令没有修改avl或者修改或使用vl */
+ bool anticpatable_exp_p (const vsetvl_info &header_info)
+ {
+ if (!header_info.has_reg_avl () && !header_info.has_reg_vl ())
+ return true;
- if (fault_first_load_p (insn->rtl ())
- && info.update_fault_first_load_avl (insn))
- return;
+ bb_info *bb = header_info.get_bb ();
+ insn_info *prev_insn = bb->head_insn ();
+ insn_info *next_insn = header_info.get_insn ();
+ if (bb != next_insn->bb ())
+ next_insn = bb->end_insn ();
+
+ return dem.safe_move_avl_vl_p (prev_insn, next_insn, header_info);
+ }
+
+ bool available_exp_p (const vsetvl_info &prev_info,
+ const vsetvl_info &next_info)
+ {
+ return dem.available_with (prev_info, next_info);
+ }
+
+ void compute_probabilities ()
+ {
+ edge e;
+ edge_iterator ei;
+
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ basic_block cfg_bb = bb->cfg_bb ();
+ auto &curr_prob = get_block_info (cfg_bb).probability;
+
+ /* GCC assume entry block (bb 0) are always so
+ executed so set its probability as "always". */
+ if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
+ curr_prob = profile_probability::always ();
+ /* Exit block (bb 1) is the block we don't need to process. */
+ if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
+ continue;
- /* If this is something that updates VL/VTYPE that we don't know about, set
- the state to unknown. */
- if (insn->is_call () || insn->is_asm ()
- || find_access (insn->defs (), VL_REGNUM)
- || find_access (insn->defs (), VTYPE_REGNUM))
- info = vector_insn_info::get_unknown ();
-}
+ gcc_assert (curr_prob.initialized_p ());
+ FOR_EACH_EDGE (e, ei, cfg_bb->succs)
+ {
+ auto &new_prob = get_block_info (e->dest).probability;
+ /* Normally, the edge probability should be initialized.
+ However, some special testing code which is written in
+ GIMPLE IR style force the edge probility uninitialized,
+ we conservatively set it as never so that it will not
+ affect PRE (Phase 3 && Phse 4). */
+ if (!e->probability.initialized_p ())
+ new_prob = profile_probability::never ();
+ else if (!new_prob.initialized_p ())
+ new_prob = curr_prob * e->probability;
+ else if (new_prob == profile_probability::always ())
+ continue;
+ else
+ new_prob += curr_prob * e->probability;
+ }
+ }
+ }
-/* Emit vsetvl within each block by forward data-flow analysis. */
-void
-pass_vsetvl::emit_local_forward_vsetvls (const bb_info *bb)
-{
- auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
- if (block_info.local_dem.empty_p ())
- return;
+ void insert_vsetvl (enum emit_type emit_type, rtx_insn *rinsn,
+ const vsetvl_info &info)
+ {
+ if (info.change_vtype_only_p ())
+ emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
+ rinsn);
- vector_insn_info curr_info;
- for (insn_info *insn : bb->real_nondebug_insns ())
- {
- const vector_insn_info prev_info = curr_info;
- enum vsetvl_type type = NUM_VSETVL_TYPE;
- transfer_before (curr_info, insn);
+ else if (info.has_reg_vl ())
+ emit_vsetvl_insn (VSETVL_NORMAL, emit_type, info, info.get_vl (), rinsn);
+ else
+ emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX,
+ rinsn);
+ }
- if (has_vtype_op (insn->rtl ()))
- {
- if (static_cast<const vl_vtype_info &> (prev_info)
- != static_cast<const vl_vtype_info &> (curr_info))
- {
- const auto require = get_vector_info (insn);
- if (!require.compatible_p (
- static_cast<const vl_vtype_info &> (prev_info)))
- type = insert_vsetvl (EMIT_BEFORE, insn->rtl (), require,
- prev_info);
- }
- }
+public:
+ pre_vsetvl ()
+ : avl_def_in (nullptr), avl_def_out (nullptr), vsetvl_def_in (nullptr),
+ vsetvl_def_out (nullptr), avloc (nullptr), avin (nullptr),
+ avout (nullptr), kill (nullptr), antloc (nullptr), transp (nullptr),
+ insert (nullptr), del (nullptr), edges (nullptr)
+ {
+ /* Initialization of RTL_SSA. */
+ calculate_dominance_info (CDI_DOMINATORS);
+ calculate_dominance_info (CDI_POST_DOMINATORS);
+ df_analyze ();
+ crtl->ssa = new function_info (cfun);
+ vector_block_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
+ compute_probabilities ();
+ unknow_info.set_unknown ();
+ }
+
+ void finish ()
+ {
+ free_dominance_info (CDI_DOMINATORS);
+ free_dominance_info (CDI_POST_DOMINATORS);
+ if (crtl->ssa->perform_pending_updates ())
+ cleanup_cfg (0);
+ delete crtl->ssa;
+ crtl->ssa = nullptr;
+
+ if (avl_regs)
+ sbitmap_free (avl_regs);
+ if (reg_def_loc)
+ sbitmap_vector_free (reg_def_loc);
+
+ if (avl_def_in)
+ sbitmap_vector_free (avl_def_in);
+ if (avl_def_out)
+ sbitmap_vector_free (avl_def_out);
+
+ if (vsetvl_def_in)
+ sbitmap_vector_free (vsetvl_def_in);
+ if (vsetvl_def_out)
+ sbitmap_vector_free (vsetvl_def_out);
+
+ if (avloc)
+ sbitmap_vector_free (avloc);
+ if (kill)
+ sbitmap_vector_free (kill);
+ if (antloc)
+ sbitmap_vector_free (antloc);
+ if (transp)
+ sbitmap_vector_free (transp);
+ if (insert)
+ sbitmap_vector_free (insert);
+ if (del)
+ sbitmap_vector_free (del);
+ if (avin)
+ sbitmap_vector_free (avin);
+ if (avout)
+ sbitmap_vector_free (avout);
+
+ if (edges)
+ free_edge_list (edges);
+ }
+
+ void fuse_local_vsetvl_info ();
+ bool earliest_fuse_vsetvl_info ();
+ bool compute_vsetvl_def_data ();
+ void pre_global_vsetvl_info ();
+ void emit_vsetvl ();
+ void cleaup ();
+ void remove_avl_operand ();
+ void remove_unused_dest_operand ();
+ bool preds_has_same_avl_p (const vsetvl_info &);
+ void dump (FILE *file, const char *title) const
+ {
+ fprintf (file, "\nVSETVL infos after %s\n\n", title);
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ const auto &block_info = vector_block_infos[bb->index ()];
+ fprintf (file, " bb %d:\n", bb->index ());
+ fprintf (file, " probability: ");
+ block_info.probability.dump (file);
+ fprintf (file, "\n");
+ if (!block_info.empty_p ())
+ {
+ fprintf (file, " Header vsetvl info:");
+ block_info.get_header_info ().dump (file, " ");
+ fprintf (file, " Footer vsetvl info:");
+ block_info.get_footer_info ().dump (file, " ");
+ for (const auto &info : block_info.infos)
+ {
+ fprintf (file,
+ " insn %d vsetvl info:", info.get_insn ()->uid ());
+ info.dump (file, " ");
+ }
+ }
+ }
+ }
- /* Fix the issue of following sequence:
- vsetivli zero, 5
- ....
- vsetvli zero, zero
- vmv.x.s (demand AVL = 8).
- ....
- incorrect: vsetvli zero, zero ===> Since the curr_info is AVL = 8.
- correct: vsetivli zero, 8
- vadd (demand AVL = 8). */
- if (type == VSETVL_VTYPE_CHANGE_ONLY)
- {
- /* Update the curr_info to be real correct AVL. */
- curr_info.set_avl_info (prev_info.get_avl_info ());
- }
- transfer_after (curr_info, insn);
- }
+ void compute_avl_def_data ()
+ {
+ if (bitmap_empty_p (avl_regs))
+ return;
- block_info.reaching_out = curr_info;
-}
+ unsigned num_regs = GP_REG_LAST + 1;
+ unsigned num_bbs = last_basic_block_for_fn (cfun);
-/* Assemble the candidates expressions for LCM. */
-void
-pass_vsetvl::prune_expressions (void)
-{
- for (const bb_info *bb : crtl->ssa->bbs ())
- {
- if (m_vector_manager->vector_block_infos[bb->index ()]
- .local_dem.valid_or_dirty_p ())
- m_vector_manager->create_expr (
- m_vector_manager->vector_block_infos[bb->index ()].local_dem);
- if (m_vector_manager->vector_block_infos[bb->index ()]
- .reaching_out.valid_or_dirty_p ())
- m_vector_manager->create_expr (
- m_vector_manager->vector_block_infos[bb->index ()].reaching_out);
- }
+ sbitmap *avl_def_loc_temp = sbitmap_vector_alloc (num_bbs, num_regs);
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ bitmap_and (avl_def_loc_temp[bb->index ()], avl_regs,
+ reg_def_loc[bb->index ()]);
- if (dump_file)
- {
- fprintf (dump_file, "\nThe total VSETVL expression num = %d\n",
- m_vector_manager->vector_exprs.length ());
- fprintf (dump_file, "Expression List:\n");
- for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
+ vsetvl_block_info &block_info = get_block_info (bb);
+ if (block_info.has_info ())
+ {
+ vsetvl_info &footer_info = block_info.get_footer_info ();
+ gcc_assert (footer_info.valid_p ());
+ if (footer_info.has_reg_vl ())
+ bitmap_set_bit (avl_def_loc_temp[bb->index ()],
+ REGNO (footer_info.get_vl ()));
+ }
+ }
+
+ if (avl_def_in)
+ sbitmap_vector_free (avl_def_in);
+ if (avl_def_out)
+ sbitmap_vector_free (avl_def_out);
+
+ unsigned num_exprs = num_bbs * num_regs;
+ sbitmap *avl_def_loc = sbitmap_vector_alloc (num_bbs, num_exprs);
+ sbitmap *kill = sbitmap_vector_alloc (num_bbs, num_exprs);
+ avl_def_in = sbitmap_vector_alloc (num_bbs, num_exprs);
+ avl_def_out = sbitmap_vector_alloc (num_bbs, num_exprs);
+
+ bitmap_vector_clear (avl_def_loc, num_bbs);
+ bitmap_vector_clear (kill, num_bbs);
+ bitmap_vector_clear (avl_def_out, num_bbs);
+
+ unsigned regno;
+ sbitmap_iterator sbi;
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ EXECUTE_IF_SET_IN_BITMAP (avl_def_loc_temp[bb->index ()], 0, regno, sbi)
{
- fprintf (dump_file, "Expr[%ld]:\n", i);
- m_vector_manager->vector_exprs[i]->dump (dump_file);
- fprintf (dump_file, "\n");
+ bitmap_set_bit (avl_def_loc[bb->index ()],
+ get_expr_id (bb->index (), regno, num_bbs));
+ bitmap_set_range (kill[bb->index ()], regno * num_bbs, num_bbs);
}
- }
-}
-/* Compute the local properties of each recorded expression.
+ basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
+ EXECUTE_IF_SET_IN_BITMAP (avl_regs, 0, regno, sbi)
+ bitmap_set_bit (avl_def_out[entry->index],
+ get_expr_id (entry->index, regno, num_bbs));
+
+ compute_reaching_defintion (avl_def_loc, kill, avl_def_in, avl_def_out);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ " Compute avl reaching defition data (num_bbs %d, num_regs "
+ "%d):\n\n",
+ num_bbs, num_regs);
+ fprintf (dump_file, " avl_regs: ");
+ dump_bitmap_file (dump_file, avl_regs);
+ fprintf (dump_file, "\n bitmap data:\n");
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ unsigned int i = bb->index ();
+ fprintf (dump_file, " BB %u:\n", i);
+ fprintf (dump_file, " avl_def_loc:");
+ unsigned expr_id;
+ sbitmap_iterator sbi;
+ EXECUTE_IF_SET_IN_BITMAP (avl_def_loc[i], 0, expr_id, sbi)
+ {
+ fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
+ get_bb_index (expr_id, num_bbs));
+ }
+ fprintf (dump_file, "\n kill:");
+ EXECUTE_IF_SET_IN_BITMAP (kill[i], 0, expr_id, sbi)
+ {
+ fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
+ get_bb_index (expr_id, num_bbs));
+ }
+ fprintf (dump_file, "\n avl_def_in:");
+ EXECUTE_IF_SET_IN_BITMAP (avl_def_in[i], 0, expr_id, sbi)
+ {
+ fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
+ get_bb_index (expr_id, num_bbs));
+ }
+ fprintf (dump_file, "\n avl_def_out:");
+ EXECUTE_IF_SET_IN_BITMAP (avl_def_out[i], 0, expr_id, sbi)
+ {
+ fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
+ get_bb_index (expr_id, num_bbs));
+ }
+ fprintf (dump_file, "\n");
+ }
+ }
+
+ sbitmap_vector_free (avl_def_loc);
+ sbitmap_vector_free (kill);
+ sbitmap_vector_free (avl_def_loc_temp);
+
+ dem.set_avl_in_out_data (avl_def_in, avl_def_out);
+ }
+ void compute_vsetvl_lcm_data ()
+ {
+ exprs.truncate (0);
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ vsetvl_block_info &block_info = get_block_info (bb);
+ if (block_info.empty_p ())
+ continue;
+ vsetvl_info &header_info = block_info.get_header_info ();
+ vsetvl_info &footer_info = block_info.get_footer_info ();
+ gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
+ add_expr (exprs, header_info);
+ add_expr (exprs, footer_info);
+ }
+
+ int num_exprs = exprs.length ();
+ if (avloc)
+ sbitmap_vector_free (avloc);
+ if (kill)
+ sbitmap_vector_free (kill);
+ if (antloc)
+ sbitmap_vector_free (antloc);
+ if (transp)
+ sbitmap_vector_free (transp);
+ if (avin)
+ sbitmap_vector_free (avin);
+ if (avout)
+ sbitmap_vector_free (avout);
+
+ avloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
+ kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
+ antloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
+ transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
+ avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
+ avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
+
+ bitmap_vector_clear (avloc, last_basic_block_for_fn (cfun));
+ bitmap_vector_clear (antloc, last_basic_block_for_fn (cfun));
+ bitmap_vector_clear (transp, last_basic_block_for_fn (cfun));
+
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ unsigned bb_index = bb->index ();
+ vsetvl_block_info &block_info = get_block_info (bb);
+
+ /* Compute transp */
+ if (block_info.empty_p ())
+ {
+ bitmap_ones (transp[bb_index]);
+ for (int i = 0; i < num_exprs; i += 1)
+ {
+ const vsetvl_info &info = *exprs[i];
+ if (!info.has_reg_avl () && !info.has_reg_vl ())
+ continue;
+
+ unsigned int regno;
+ sbitmap_iterator sbi;
+ EXECUTE_IF_SET_IN_BITMAP (reg_def_loc[bb->index ()], 0, regno,
+ sbi)
+ {
+ if (regno == REGNO (info.get_avl ()))
+ bitmap_clear_bit (transp[bb->index ()], i);
+ }
+
+ for (const insn_info *insn : bb->real_nondebug_insns ())
+ {
+ if ((info.has_reg_avl ()
+ && find_access (insn->defs (),
+ REGNO (info.get_avl ())))
+ || (info.has_reg_vl ()
+ && find_access (insn->uses (),
+ REGNO (info.get_vl ()))))
+ {
+ bitmap_clear_bit (transp[bb_index], i);
+ break;
+ }
+ }
+ }
+
+ continue;
+ }
- Local properties are those that are defined by the block, irrespective of
- other blocks.
+ vsetvl_info &header_info = block_info.get_header_info ();
+ vsetvl_info &footer_info = block_info.get_footer_info ();
- An expression is transparent in a block if its operands are not modified
- in the block.
+ if (header_info.valid_p ()
+ && (anticpatable_exp_p (header_info) || block_info.full_available))
+ bitmap_set_bit (antloc[bb_index],
+ get_expr_index (exprs, header_info));
- An expression is computed (locally available) in a block if it is computed
- at least once and expression would contain the same value if the
- computation was moved to the end of the block.
+ if (footer_info.valid_p ())
+ for (int i = 0; i < num_exprs; i += 1)
+ {
+ const vsetvl_info &info = *exprs[i];
+ if (!info.valid_p ())
+ continue;
+ if (available_exp_p (footer_info, info))
+ bitmap_set_bit (avloc[bb_index], i);
+ }
+ }
+
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ unsigned bb_index = bb->index ();
+ bitmap_ior (kill[bb_index], transp[bb_index], avloc[bb_index]);
+ bitmap_not (kill[bb_index], kill[bb_index]);
+ }
+
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ unsigned bb_index = bb->index ();
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->cfg_bb ()->preds)
+ if (e->flags & EDGE_COMPLEX)
+ {
+ bitmap_clear (antloc[bb_index]);
+ bitmap_clear (transp[bb_index]);
+ }
+ }
+ }
+
+ bool earliest_fusion_worthwhile_p (const basic_block cfg_bb) const
+ {
+ edge e;
+ edge_iterator ei;
+ profile_probability prob = profile_probability::uninitialized ();
+ FOR_EACH_EDGE (e, ei, cfg_bb->succs)
+ {
+ if (prob == profile_probability::uninitialized ())
+ prob = vector_block_infos[e->dest->index].probability;
+ else if (prob == vector_block_infos[e->dest->index].probability)
+ continue;
+ else
+ /* We pick the highest probability among those incompatible VSETVL
+ infos. When all incompatible VSTEVL infos have same probability, we
+ don't pick any of them. */
+ return true;
+ }
+ return false;
+ }
+};
- An expression is locally anticipatable in a block if it is computed at
- least once and expression would contain the same value if the computation
- was moved to the beginning of the block. */
void
-pass_vsetvl::compute_local_properties (void)
+pre_vsetvl::fuse_local_vsetvl_info ()
{
- /* - If T is locally available at the end of a block, then T' must be
- available at the end of the same block. Since some optimization has
- occurred earlier, T' might not be locally available, however, it must
- have been previously computed on all paths. As a formula, T at AVLOC(B)
- implies that T' at AVOUT(B).
- An "available occurrence" is one that is the last occurrence in the
- basic block and the operands are not modified by following statements in
- the basic block [including this insn].
-
- - If T is locally anticipated at the beginning of a block, then either
- T', is locally anticipated or it is already available from previous
- blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
- ANTLOC(B) at AVIN(B).
- An "anticipatable occurrence" is one that is the first occurrence in the
- basic block, the operands are not modified in the basic block prior
- to the occurrence and the output is not used between the start of
- the block and the occurrence. */
+ reg_def_loc
+ = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), GP_REG_LAST + 1);
+ bitmap_vector_clear (reg_def_loc, last_basic_block_for_fn (cfun));
+ bitmap_ones (reg_def_loc[ENTRY_BLOCK_PTR_FOR_FN (cfun)->index]);
- basic_block cfg_bb;
- for (const bb_info *bb : crtl->ssa->bbs ())
+ for (bb_info *bb : crtl->ssa->bbs ())
{
- unsigned int curr_bb_idx = bb->index ();
- if (curr_bb_idx == ENTRY_BLOCK || curr_bb_idx == EXIT_BLOCK)
- continue;
- const auto local_dem
- = m_vector_manager->vector_block_infos[curr_bb_idx].local_dem;
- const auto reaching_out
- = m_vector_manager->vector_block_infos[curr_bb_idx].reaching_out;
-
- /* Compute transparent. */
- for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
+ auto &block_info = get_block_info (bb);
+ block_info.m_bb = bb;
+ if (dump_file && (dump_flags & TDF_DETAILS))
{
- const auto *expr = m_vector_manager->vector_exprs[i];
- if (local_dem.valid_or_dirty_p () || local_dem.unknown_p ())
- bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], i);
- else if (expr->has_avl_reg ())
- {
- rtx reg = expr->get_avl_or_vl_reg ();
- for (const insn_info *insn : bb->real_nondebug_insns ())
- {
- if (find_access (insn->defs (), REGNO (reg)))
- {
- bitmap_clear_bit (
- m_vector_manager->vector_transp[curr_bb_idx], i);
- break;
- }
- else if (vlmax_avl_p (expr->get_avl ())
- && find_access (insn->uses (), REGNO (reg)))
- {
- bitmap_clear_bit (
- m_vector_manager->vector_transp[curr_bb_idx], i);
- break;
- }
- }
- }
+ fprintf (dump_file, " Try fuse basic block %d\n", bb->index ());
}
-
- /* Compute anticipatable occurrences. */
- if (local_dem.valid_or_dirty_p ())
- if (anticipatable_occurrence_p (bb, local_dem))
- bitmap_set_bit (m_vector_manager->vector_antic[curr_bb_idx],
- m_vector_manager->get_expr_id (local_dem));
-
- /* Compute available occurrences. */
- if (reaching_out.valid_or_dirty_p ())
+ auto_vec<vsetvl_info> infos;
+ for (insn_info *insn : bb->real_nondebug_insns ())
{
- auto_vec<size_t> available_list
- = m_vector_manager->get_all_available_exprs (reaching_out);
- for (size_t i = 0; i < available_list.length (); i++)
- {
- const vector_insn_info *expr
- = m_vector_manager->vector_exprs[available_list[i]];
- if (available_occurrence_p (bb, *expr))
- bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx],
- available_list[i]);
- }
+ vsetvl_info curr_info = vsetvl_info (insn);
+ if (curr_info.valid_p () || curr_info.unknown_p ())
+ infos.safe_push (curr_info);
+
+ /* Collecting GP registers modified by the current bb. */
+ if (insn->is_real ())
+ for (def_info *def : insn->defs ())
+ if (def->is_reg () && GP_REG_P (def->regno ()))
+ bitmap_set_bit (reg_def_loc[bb->index ()], def->regno ());
}
- if (loop_basic_block_p (bb->cfg_bb ()) && local_dem.valid_or_dirty_p ()
- && reaching_out.valid_or_dirty_p ()
- && !local_dem.compatible_p (reaching_out))
- bitmap_clear_bit (m_vector_manager->vector_antic[curr_bb_idx],
- m_vector_manager->get_expr_id (local_dem));
- }
-
- /* Compute kill for each basic block using:
-
- ~(TRANSP | COMP)
- */
-
- FOR_EACH_BB_FN (cfg_bb, cfun)
- {
- bitmap_ior (m_vector_manager->vector_kill[cfg_bb->index],
- m_vector_manager->vector_transp[cfg_bb->index],
- m_vector_manager->vector_comp[cfg_bb->index]);
- bitmap_not (m_vector_manager->vector_kill[cfg_bb->index],
- m_vector_manager->vector_kill[cfg_bb->index]);
- }
-
- FOR_EACH_BB_FN (cfg_bb, cfun)
- {
- edge e;
- edge_iterator ei;
-
- /* If the current block is the destination of an abnormal edge, we
- kill all trapping (for PRE) and memory (for hoist) expressions
- because we won't be able to properly place the instruction on
- the edge. So make them neither anticipatable nor transparent.
- This is fairly conservative.
-
- ??? For hoisting it may be necessary to check for set-and-jump
- instructions here, not just for abnormal edges. The general problem
- is that when an expression cannot not be placed right at the end of
- a basic block we should account for any side-effects of a subsequent
- jump instructions that could clobber the expression. It would
- be best to implement this check along the lines of
- should_hoist_expr_to_dom where the target block is already known
- and, hence, there's no need to conservatively prune expressions on
- "intermediate" set-and-jump instructions. */
- FOR_EACH_EDGE (e, ei, cfg_bb->preds)
- if (e->flags & EDGE_COMPLEX)
- {
- bitmap_clear (m_vector_manager->vector_antic[cfg_bb->index]);
- bitmap_clear (m_vector_manager->vector_transp[cfg_bb->index]);
- }
- }
-}
-
-/* Fuse demand info for earliest edge. */
-bool
-pass_vsetvl::earliest_fusion (void)
-{
- bool changed_p = false;
- for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++)
- {
- for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
+ vsetvl_info prev_info = vsetvl_info ();
+ prev_info.set_empty ();
+ for (auto &curr_info : infos)
{
- auto &expr = *m_vector_manager->vector_exprs[i];
- if (expr.empty_p ())
- continue;
- edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed);
- /* If it is the edge that we never reach, skip its possible PRE
- fusion conservatively. */
- if (eg->probability == profile_probability::never ())
- break;
- if (eg->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
- || eg->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
- break;
- if (bitmap_bit_p (m_vector_manager->vector_earliest[ed], i))
+ if (prev_info.empty_p ())
+ prev_info = curr_info;
+ else if ((curr_info.unknown_p () && prev_info.valid_p ())
+ || (curr_info.valid_p () && prev_info.unknown_p ()))
{
- auto &src_block_info = get_block_info (eg->src);
- auto &dest_block_info = get_block_info (eg->dest);
- if (src_block_info.reaching_out.unknown_p ())
- break;
-
- gcc_assert (!(eg->flags & EDGE_ABNORMAL));
- vector_insn_info new_info = vector_insn_info ();
- profile_probability prob = src_block_info.probability;
- /* We don't fuse user vsetvl into EMPTY or
- DIRTY (EMPTY but polluted) block for these
- following reasons:
-
- - The user vsetvl instruction is configured as
- no side effects that the previous passes
- (GSCE, Loop-invariant, ..., etc)
- should be able to do a good job on optimization
- of user explicit vsetvls so we don't need to
- PRE optimization (The user vsetvls should be
- on the optimal local already before this pass)
- again for user vsetvls in VSETVL PASS here
- (Phase 3 && Phase 4).
-
- - Allowing user vsetvls be optimized in PRE
- optimization here (Phase 3 && Phase 4) will
- complicate the codes so much so we prefer user
- vsetvls be optimized in post-optimization
- (Phase 5 && Phase 6). */
- if (vsetvl_insn_p (expr.get_insn ()->rtl ()))
+ block_info.infos.safe_push (prev_info);
+ prev_info = curr_info;
+ }
+ else if (curr_info.valid_p () && prev_info.valid_p ())
+ {
+ if (dem.available_with (prev_info, curr_info))
{
- if (src_block_info.reaching_out.empty_p ())
- continue;
- else if (src_block_info.reaching_out.dirty_p ()
- && !src_block_info.reaching_out.compatible_p (expr))
+ if (dump_file && (dump_flags & TDF_DETAILS))
{
- new_info.set_empty ();
- /* Update probability as uninitialized status so that
- we won't try to fuse any demand info into such EMPTY
- block any more. */
- prob = profile_probability::uninitialized ();
- update_block_info (eg->src->index, prob, new_info);
- continue;
+ fprintf (dump_file,
+ " Ignore curr info since prev info "
+ "available with it:\n");
+ fprintf (dump_file, " prev_info: ");
+ prev_info.dump (dump_file, " ");
+ fprintf (dump_file, " curr_info: ");
+ curr_info.dump (dump_file, " ");
+ fprintf (dump_file, "\n");
}
- }
+ if (!curr_info.use_by_non_rvv_insn_p ()
+ && vsetvl_insn_p (curr_info.get_insn ()->rtl ()))
+ delete_list.safe_push (curr_info);
- if (src_block_info.reaching_out.empty_p ())
- {
- if (src_block_info.probability
- == profile_probability::uninitialized ())
- continue;
- new_info = expr.global_merge (expr, eg->src->index);
- new_info.set_dirty ();
- prob = dest_block_info.probability;
- update_block_info (eg->src->index, prob, new_info);
- changed_p = true;
+ if (curr_info.get_read_vl_insn ())
+ prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
}
- else if (src_block_info.reaching_out.dirty_p ())
+ else if (dem.compatible_with (prev_info, curr_info))
{
- /* DIRTY -> DIRTY or VALID -> DIRTY. */
- if (demands_can_be_fused_p (src_block_info.reaching_out,
- expr))
- {
- new_info = src_block_info.reaching_out.global_merge (
- expr, eg->src->index);
- new_info.set_dirty ();
- prob += dest_block_info.probability;
- }
- else if (!src_block_info.reaching_out.compatible_p (expr)
- && !m_vector_manager->earliest_fusion_worthwhile_p (
- eg->src))
+ if (dump_file && (dump_flags & TDF_DETAILS))
{
- new_info.set_empty ();
- prob = profile_probability::uninitialized ();
+ fprintf (dump_file, " Fuse curr info since prev info "
+ "compatible with it:\n");
+ fprintf (dump_file, " prev_info: ");
+ prev_info.dump (dump_file, " ");
+ fprintf (dump_file, " curr_info: ");
+ curr_info.dump (dump_file, " ");
}
- else if (!src_block_info.reaching_out.compatible_p (expr)
- && dest_block_info.probability
- > src_block_info.probability)
+ dem.merge_with (prev_info, curr_info);
+ if (curr_info.get_read_vl_insn ())
+ prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
+ if (dump_file && (dump_flags & TDF_DETAILS))
{
- new_info = expr;
- new_info.set_dirty ();
- prob = dest_block_info.probability;
+ fprintf (dump_file, " prev_info after fused: ");
+ prev_info.dump (dump_file, " ");
+ fprintf (dump_file, "\n");
}
- else
- continue;
- update_block_info (eg->src->index, prob, new_info);
- changed_p = true;
}
else
{
- rtx vl = NULL_RTX;
- if (vsetvl_insn_p (
- src_block_info.reaching_out.get_insn ()->rtl ())
- && vsetvl_dominated_by_p (eg->src, expr,
- src_block_info.reaching_out,
- true))
- ;
- else if (!demands_can_be_fused_p (src_block_info.reaching_out,
- expr))
- continue;
- else if (!earliest_pred_can_be_fused_p (
- crtl->ssa->bb (eg->src),
- src_block_info.reaching_out, expr, &vl))
- continue;
-
- vector_insn_info new_info
- = src_block_info.reaching_out.global_merge (expr,
- eg->src->index);
-
- prob = std::max (dest_block_info.probability,
- src_block_info.probability);
- change_vsetvl_insn (new_info.get_insn (), new_info, vl);
- update_block_info (eg->src->index, prob, new_info);
- changed_p = true;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ " Cannot fuse uncompatible infos:\n");
+ fprintf (dump_file, " prev_info: ");
+ prev_info.dump (dump_file, " ");
+ fprintf (dump_file, " curr_info: ");
+ curr_info.dump (dump_file, " ");
+ }
+ block_info.infos.safe_push (prev_info);
+ prev_info = curr_info;
}
}
}
+
+ if (prev_info.valid_p () || prev_info.unknown_p ())
+ block_info.infos.safe_push (prev_info);
}
- return changed_p;
-}
-/* Fuse VSETVL demand info according LCM computed location. */
-void
-pass_vsetvl::vsetvl_fusion (void)
-{
- /* Fuse VSETVL demand info until VSETVL CFG fixed. */
- bool changed_p = true;
- int fusion_no = 0;
- while (changed_p)
+ avl_regs = sbitmap_alloc (GP_REG_LAST + 1);
+ bitmap_clear (avl_regs);
+ for (const bb_info *bb : crtl->ssa->bbs ())
{
- changed_p = false;
- fusion_no++;
- prune_expressions ();
- m_vector_manager->create_bitmap_vectors ();
- compute_local_properties ();
- /* Compute global availability. */
- compute_available (m_vector_manager->vector_comp,
- m_vector_manager->vector_kill,
- m_vector_manager->vector_avout,
- m_vector_manager->vector_avin);
- /* Compute global anticipatability. */
- compute_antinout_edge (m_vector_manager->vector_antic,
- m_vector_manager->vector_transp,
- m_vector_manager->vector_antin,
- m_vector_manager->vector_antout);
- /* Compute earliestness. */
- compute_earliest (m_vector_manager->vector_edge_list,
- m_vector_manager->vector_exprs.length (),
- m_vector_manager->vector_antin,
- m_vector_manager->vector_antout,
- m_vector_manager->vector_avout,
- m_vector_manager->vector_kill,
- m_vector_manager->vector_earliest);
- changed_p |= earliest_fusion ();
- if (dump_file && (dump_flags & TDF_DETAILS))
+ vsetvl_block_info &block_info = get_block_info (bb);
+ if (block_info.empty_p ())
+ continue;
+
+ vsetvl_info &header_info = block_info.get_header_info ();
+ if (header_info.valid_p () && header_info.has_reg_avl ())
{
- fprintf (dump_file, "\nEARLIEST fusion %d\n", fusion_no);
- m_vector_manager->dump (dump_file);
+ gcc_assert (GP_REG_P (REGNO (header_info.get_avl ())));
+ bitmap_set_bit (avl_regs, REGNO (header_info.get_avl ()));
}
- m_vector_manager->free_bitmap_vectors ();
- if (!m_vector_manager->vector_exprs.is_empty ())
- m_vector_manager->vector_exprs.release ();
}
}
-/* Return true if VSETVL in the block can be refined as vsetvl zero,zero. */
bool
-pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb,
- const vector_insn_info &info) const
+pre_vsetvl::earliest_fuse_vsetvl_info ()
{
- if (!m_vector_manager->all_same_ratio_p (
- m_vector_manager->vector_avin[cfg_bb->index]))
- return false;
+ compute_avl_def_data ();
+ compute_vsetvl_def_data ();
+ compute_vsetvl_lcm_data ();
- if (!m_vector_manager->all_same_avl_p (
- cfg_bb, m_vector_manager->vector_avin[cfg_bb->index]))
- return false;
+ unsigned num_exprs = exprs.length ();
+ struct edge_list *edges = create_edge_list ();
+ unsigned num_edges = NUM_EDGES (edges);
+ sbitmap *antin
+ = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
+ sbitmap *antout
+ = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
- size_t expr_id
- = bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]);
- if (!m_vector_manager->vector_exprs[expr_id]->same_vlmax_p (info))
- return false;
- if (!m_vector_manager->vector_exprs[expr_id]->compatible_avl_p (info))
- return false;
+ sbitmap *earliest = sbitmap_vector_alloc (num_edges, num_exprs);
- edge e;
- edge_iterator ei;
- bool all_valid_p = true;
- FOR_EACH_EDGE (e, ei, cfg_bb->preds)
+ compute_available (avloc, kill, avout, avin);
+ compute_antinout_edge (antloc, transp, antin, antout);
+ compute_earliest (edges, num_exprs, antin, antout, avout, kill, earliest);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
{
- if (bitmap_empty_p (m_vector_manager->vector_avout[e->src->index]))
+ fprintf (dump_file, "\n Compute LCM earliest insert data:\n\n");
+ fprintf (dump_file, " Expression List (%u):\n", num_exprs);
+ for (unsigned i = 0; i < num_exprs; i++)
{
- all_valid_p = false;
- break;
+ const auto &info = *exprs[i];
+ fprintf (dump_file, " Expr[%u]: ", i);
+ info.dump (dump_file, " ");
}
- }
-
- if (!all_valid_p)
- return false;
- return true;
-}
-
-/* Optimize athe case like this:
+ fprintf (dump_file, "\n bitmap data:\n");
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ unsigned int i = bb->index ();
+ fprintf (dump_file, " BB %u:\n", i);
+ fprintf (dump_file, " avloc: ");
+ dump_bitmap_file (dump_file, avloc[i]);
+ fprintf (dump_file, " kill: ");
+ dump_bitmap_file (dump_file, kill[i]);
+ fprintf (dump_file, " antloc: ");
+ dump_bitmap_file (dump_file, antloc[i]);
+ fprintf (dump_file, " transp: ");
+ dump_bitmap_file (dump_file, transp[i]);
+
+ fprintf (dump_file, " avin: ");
+ dump_bitmap_file (dump_file, avin[i]);
+ fprintf (dump_file, " avout: ");
+ dump_bitmap_file (dump_file, avout[i]);
+ fprintf (dump_file, " antin: ");
+ dump_bitmap_file (dump_file, antin[i]);
+ fprintf (dump_file, " antout: ");
+ dump_bitmap_file (dump_file, antout[i]);
+ }
+ fprintf (dump_file, "\n");
+ fprintf (dump_file, " earliest:\n");
+ for (unsigned ed = 0; ed < num_edges; ed++)
+ {
+ edge eg = INDEX_EDGE (edges, ed);
- bb 0:
- vsetvl 0 a5,zero,e8,mf8
- insn 0 (demand SEW + LMUL)
- bb 1:
- vsetvl 1 a5,zero,e16,mf4
- insn 1 (demand SEW + LMUL)
+ if (bitmap_empty_p (earliest[ed]))
+ continue;
+ fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index,
+ eg->dest->index);
+ dump_bitmap_file (dump_file, earliest[ed]);
+ }
+ fprintf (dump_file, "\n");
+ }
- In this case, we should be able to refine
- vsetvl 1 into vsetvl zero, zero according AVIN. */
-void
-pass_vsetvl::refine_vsetvls (void) const
-{
- basic_block cfg_bb;
- FOR_EACH_BB_FN (cfg_bb, cfun)
+ if (dump_file && (dump_flags & TDF_DETAILS))
{
- auto info = get_block_info (cfg_bb).local_dem;
- insn_info *insn = info.get_insn ();
- if (!info.valid_p ())
- continue;
+ fprintf (dump_file, " Fused global info result:\n");
+ }
- rtx_insn *rinsn = insn->rtl ();
- if (!can_refine_vsetvl_p (cfg_bb, info))
+ bool changed = false;
+ for (unsigned ed = 0; ed < num_edges; ed++)
+ {
+ sbitmap e = earliest[ed];
+ if (bitmap_empty_p (e))
continue;
- /* We can't refine user vsetvl into vsetvl zero,zero since the dest
- will be used by the following instructions. */
- if (vector_config_insn_p (rinsn))
+ unsigned int expr_index;
+ sbitmap_iterator sbi;
+ EXECUTE_IF_SET_IN_BITMAP (e, 0, expr_index, sbi)
{
- m_vector_manager->to_refine_vsetvls.add (rinsn);
- continue;
- }
+ vsetvl_info &curr_info = *exprs[expr_index];
+ if (!curr_info.valid_p ())
+ continue;
+
+ edge eg = INDEX_EDGE (edges, ed);
+ if (eg->probability == profile_probability::never ())
+ continue;
+ if (eg->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
+ || eg->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
+ continue;
- /* If all incoming edges to a block have a vector state that is compatbile
- with the block. In such a case we need not emit a vsetvl in the current
- block. */
+ vsetvl_block_info &src_block_info = get_block_info (eg->src);
+ vsetvl_block_info &dest_block_info = get_block_info (eg->dest);
- gcc_assert (has_vtype_op (insn->rtl ()));
- rinsn = PREV_INSN (insn->rtl ());
- gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ())));
- if (m_vector_manager->all_avail_in_compatible_p (cfg_bb))
- {
- size_t id = m_vector_manager->get_expr_id (info);
- if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], id))
+ if (src_block_info.probability
+ == profile_probability::uninitialized ())
continue;
- eliminate_insn (rinsn);
- }
- else
- {
- rtx new_pat
- = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
- change_insn (rinsn, new_pat);
- }
- }
-}
-void
-pass_vsetvl::cleanup_vsetvls ()
-{
- basic_block cfg_bb;
- FOR_EACH_BB_FN (cfg_bb, cfun)
- {
- auto &info = get_block_info (cfg_bb).reaching_out;
- gcc_assert (m_vector_manager->expr_set_num (
- m_vector_manager->vector_del[cfg_bb->index])
- <= 1);
- for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
- {
- if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], i))
+ if (src_block_info.empty_p ())
{
- if (info.dirty_p ())
- info.set_unknown ();
- else
+ vsetvl_info new_curr_info = curr_info;
+ new_curr_info.set_bb (crtl->ssa->bb (eg->src));
+ bool has_compatible_p = false;
+ unsigned int def_expr_index;
+ sbitmap_iterator sbi2;
+ EXECUTE_IF_SET_IN_BITMAP (
+ vsetvl_def_in[new_curr_info.get_bb ()->index ()], 0,
+ def_expr_index, sbi2)
+ {
+ vsetvl_info &prev_info = *vsetvl_def_exprs[def_expr_index];
+ if (!prev_info.valid_p ())
+ continue;
+ if (dem.compatible_with (prev_info, new_curr_info))
+ {
+ has_compatible_p = true;
+ break;
+ }
+ }
+ if (!has_compatible_p)
{
- const auto dem = get_block_info (cfg_bb).local_dem;
- gcc_assert (dem == *m_vector_manager->vector_exprs[i]);
- insn_info *insn = dem.get_insn ();
- gcc_assert (insn && insn->rtl ());
- rtx_insn *rinsn;
- /* We can't eliminate user vsetvl since the dest will be used
- * by the following instructions. */
- if (vector_config_insn_p (insn->rtl ()))
+ if (dump_file && (dump_flags & TDF_DETAILS))
{
- m_vector_manager->to_delete_vsetvls.add (insn->rtl ());
- continue;
+ fprintf (dump_file,
+ " Forbidden lift up vsetvl info into bb %u "
+ "since there is no vsetvl info that reaching in "
+ "is compatible with it:",
+ eg->src->index);
+ curr_info.dump (dump_file, " ");
}
+ continue;
+ }
- gcc_assert (has_vtype_op (insn->rtl ()));
- rinsn = PREV_INSN (insn->rtl ());
- gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ())));
- eliminate_insn (rinsn);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ " Set empty bb %u to info:", eg->src->index);
+ curr_info.dump (dump_file, " ");
}
+ src_block_info.set_info (curr_info);
+ src_block_info.probability = dest_block_info.probability;
+ changed = true;
}
- }
- }
-}
-
-bool
-pass_vsetvl::commit_vsetvls (void)
-{
- bool need_commit = false;
-
- for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++)
- {
- for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
- {
- edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed);
- if (bitmap_bit_p (m_vector_manager->vector_insert[ed], i))
+ else if (src_block_info.has_info ())
{
- const vector_insn_info *require
- = m_vector_manager->vector_exprs[i];
- gcc_assert (require->valid_or_dirty_p ());
- rtl_profile_for_edge (eg);
- start_sequence ();
-
- insn_info *insn = require->get_insn ();
- vector_insn_info prev_info = vector_insn_info ();
- sbitmap bitdata = m_vector_manager->vector_avout[eg->src->index];
- if (m_vector_manager->all_same_ratio_p (bitdata)
- && m_vector_manager->all_same_avl_p (eg->dest, bitdata))
+ vsetvl_info &prev_info = src_block_info.get_footer_info ();
+ gcc_assert (prev_info.valid_p ());
+
+ if (dem.compatible_with (prev_info, curr_info))
{
- size_t first = bitmap_first_set_bit (bitdata);
- prev_info = *m_vector_manager->vector_exprs[first];
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, " Fuse curr info since prev info "
+ "compatible with it:\n");
+ fprintf (dump_file, " prev_info: ");
+ prev_info.dump (dump_file, " ");
+ fprintf (dump_file, " curr_info: ");
+ curr_info.dump (dump_file, " ");
+ }
+ dem.merge_with (prev_info, curr_info);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, " prev_info after fused: ");
+ prev_info.dump (dump_file, " ");
+ fprintf (dump_file, "\n");
+ }
+ changed = true;
+ if (src_block_info.has_info ())
+ src_block_info.probability += dest_block_info.probability;
}
+ else if (src_block_info.has_info ()
+ && !dem.compatible_with (prev_info, curr_info))
+ {
+ /* Cancel lift up if probabilities are equal. */
+ if (!earliest_fusion_worthwhile_p (eg->src))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ " Change empty bb %u to from:",
+ eg->src->index);
+ prev_info.dump (dump_file, " ");
+ fprintf (dump_file,
+ " to (higher probability):");
+ curr_info.dump (dump_file, " ");
+ }
+ src_block_info.set_empty_info ();
+ src_block_info.probability
+ = profile_probability::uninitialized ();
+ changed = true;
+ }
+ /* Choose the one with higher probability. */
+ else if (dest_block_info.probability
+ > src_block_info.probability)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ " Change empty bb %u to from:",
+ eg->src->index);
+ prev_info.dump (dump_file, " ");
+ fprintf (dump_file,
+ " to (higher probability):");
+ curr_info.dump (dump_file, " ");
+ }
+ src_block_info.set_info (curr_info);
+ src_block_info.probability = dest_block_info.probability;
+ changed = true;
+ }
+ }
+ }
+ else
+ {
+ vsetvl_info &prev_info = src_block_info.get_footer_info ();
+ if (!prev_info.valid_p ()
+ || dem.available_with (prev_info, curr_info))
+ continue;
- insert_vsetvl (EMIT_DIRECT, insn->rtl (), *require, prev_info);
- rtx_insn *rinsn = get_insns ();
- end_sequence ();
- default_rtl_profile ();
-
- /* We should not get an abnormal edge here. */
- gcc_assert (!(eg->flags & EDGE_ABNORMAL));
- need_commit = true;
- insert_insn_on_edge (rinsn, eg);
-
- if (dump_file)
+ if (dem.compatible_with (prev_info, curr_info))
{
- fprintf (dump_file,
- "\nInsert vsetvl insn %d at edge %d from <bb %d> to "
- "<bb %d>:\n",
- INSN_UID (rinsn), ed, eg->src->index,
- eg->dest->index);
- print_rtl_single (dump_file, rinsn);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, " Fuse curr info since prev info "
+ "compatible with it:\n");
+ fprintf (dump_file, " prev_info: ");
+ prev_info.dump (dump_file, " ");
+ fprintf (dump_file, " curr_info: ");
+ curr_info.dump (dump_file, " ");
+ }
+ dem.merge_with (prev_info, curr_info);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, " prev_info after fused: ");
+ prev_info.dump (dump_file, " ");
+ fprintf (dump_file, "\n");
+ }
+ changed = true;
}
}
}
}
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\n");
+ }
+
+ sbitmap_vector_free (antin);
+ sbitmap_vector_free (antout);
+ sbitmap_vector_free (earliest);
+ free_edge_list (edges);
+
+ return changed;
+}
+
+bool
+pre_vsetvl::compute_vsetvl_def_data ()
+{
+ vsetvl_def_exprs.truncate (0);
+ add_expr (vsetvl_def_exprs, unknow_info);
for (const bb_info *bb : crtl->ssa->bbs ())
{
- basic_block cfg_bb = bb->cfg_bb ();
- const auto reaching_out = get_block_info (cfg_bb).reaching_out;
- if (!reaching_out.dirty_p ())
+ vsetvl_block_info &block_info = get_block_info (bb);
+ if (block_info.empty_p ())
continue;
+ vsetvl_info &footer_info = block_info.get_footer_info ();
+ gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
+ add_expr (vsetvl_def_exprs, footer_info);
+ }
- rtx new_pat;
- if (!reaching_out.demand_p (DEMAND_AVL))
- {
- vl_vtype_info new_info = reaching_out;
- new_info.set_avl_info (avl_info (const0_rtx, nullptr));
- new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX);
- }
- else if (can_refine_vsetvl_p (cfg_bb, reaching_out))
- new_pat
- = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, reaching_out, NULL_RTX);
- else if (vlmax_avl_p (reaching_out.get_avl ()))
- {
- rtx vl = reaching_out.get_avl_or_vl_reg ();
- new_pat = gen_vsetvl_pat (VSETVL_NORMAL, reaching_out, vl);
- }
- else
- new_pat
- = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, reaching_out, NULL_RTX);
+ if (vsetvl_def_in)
+ sbitmap_vector_free (vsetvl_def_in);
+ if (vsetvl_def_out)
+ sbitmap_vector_free (vsetvl_def_out);
- edge eg;
- edge_iterator eg_iterator;
- FOR_EACH_EDGE (eg, eg_iterator, cfg_bb->succs)
- {
- /* We should not get an abnormal edge here. */
- gcc_assert (!(eg->flags & EDGE_ABNORMAL));
- /* We failed to optimize this case in Phase 3 (earliest fusion):
-
- bb 2: vsetvl a5, a3 ...
- goto bb 4
- bb 3: vsetvl a5, a2 ...
- goto bb 4
- bb 4: vsetvli zero, a5 ---> Redundant, should be elided.
-
- Since "a5" value can come from either bb 2 or bb 3, we can't make
- it optimized in Phase 3 which will make phase 3 so complicated.
- Now, we do post optimization here to elide the redundant VSETVL
- insn in bb4. */
- if (m_vector_manager->vsetvl_dominated_by_all_preds_p (cfg_bb,
- reaching_out))
- continue;
+ sbitmap *def_loc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
+ vsetvl_def_exprs.length ());
+ sbitmap *kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
+ vsetvl_def_exprs.length ());
- start_sequence ();
- emit_insn (copy_rtx (new_pat));
- rtx_insn *rinsn = get_insns ();
- end_sequence ();
+ vsetvl_def_in = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
+ vsetvl_def_exprs.length ());
+ vsetvl_def_out = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
+ vsetvl_def_exprs.length ());
- insert_insn_on_edge (rinsn, eg);
- need_commit = true;
- if (dump_file)
+ bitmap_vector_clear (def_loc, last_basic_block_for_fn (cfun));
+ bitmap_vector_clear (kill, last_basic_block_for_fn (cfun));
+ bitmap_vector_clear (vsetvl_def_out, last_basic_block_for_fn (cfun));
+
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ vsetvl_block_info &block_info = get_block_info (bb);
+ if (block_info.empty_p ())
+ {
+ for (unsigned i = 0; i < vsetvl_def_exprs.length (); i += 1)
{
- fprintf (dump_file,
- "\nInsert vsetvl insn %d from <bb %d> to <bb %d>:\n",
- INSN_UID (rinsn), cfg_bb->index, eg->dest->index);
- print_rtl_single (dump_file, rinsn);
+ const vsetvl_info &info = *vsetvl_def_exprs[i];
+ if (!info.has_reg_avl ())
+ continue;
+ unsigned int regno;
+ sbitmap_iterator sbi;
+ EXECUTE_IF_SET_IN_BITMAP (reg_def_loc[bb->index ()], 0, regno,
+ sbi)
+ if (regno == REGNO (info.get_avl ()))
+ bitmap_set_bit (kill[bb->index ()], i);
}
+ continue;
}
- }
-
- return need_commit;
-}
-
-void
-pass_vsetvl::pre_vsetvl (void)
-{
- /* Compute entity list. */
- prune_expressions ();
-
- m_vector_manager->create_bitmap_vectors ();
- compute_local_properties ();
- m_vector_manager->vector_edge_list = pre_edge_lcm_avs (
- m_vector_manager->vector_exprs.length (), m_vector_manager->vector_transp,
- m_vector_manager->vector_comp, m_vector_manager->vector_antic,
- m_vector_manager->vector_kill, m_vector_manager->vector_avin,
- m_vector_manager->vector_avout, &m_vector_manager->vector_insert,
- &m_vector_manager->vector_del);
-
- /* We should dump the information before CFG is changed. Otherwise it will
- produce ICE (internal compiler error). */
- if (dump_file && (dump_flags & TDF_DETAILS))
- m_vector_manager->dump (dump_file);
- refine_vsetvls ();
- cleanup_vsetvls ();
- bool need_commit = commit_vsetvls ();
- if (need_commit)
- commit_edge_insertions ();
-}
+ vsetvl_info &footer_info = block_info.get_footer_info ();
+ bitmap_ones (kill[bb->index ()]);
+ bitmap_set_bit (def_loc[bb->index ()],
+ get_expr_index (vsetvl_def_exprs, footer_info));
+ }
-/* Some instruction can not be accessed in RTL_SSA when we don't re-init
- the new RTL_SSA framework but it is definetely at the END of the block.
+ /* Set the def_out of the ENTRY basic block to unknow_info expr. */
+ basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
+ bitmap_set_bit (vsetvl_def_out[entry->index],
+ get_expr_index (vsetvl_def_exprs, unknow_info));
- Here we optimize the VSETVL is hoisted by LCM:
+ compute_reaching_defintion (def_loc, kill, vsetvl_def_in, vsetvl_def_out);
- Before LCM:
- bb 1:
- vsetvli a5,a2,e32,m1,ta,mu
- bb 2:
- vsetvli zero,a5,e32,m1,ta,mu
- ...
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ "\n Compute vsetvl info reaching defition data:\n\n");
+ fprintf (dump_file, " Expression List (%d):\n",
+ vsetvl_def_exprs.length ());
+ for (unsigned i = 0; i < vsetvl_def_exprs.length (); i++)
+ {
+ const auto &info = *vsetvl_def_exprs[i];
+ fprintf (dump_file, " Expr[%u]: ", i);
+ info.dump (dump_file, " ");
+ }
+ fprintf (dump_file, "\n bitmap data:\n");
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ unsigned int i = bb->index ();
+ fprintf (dump_file, " BB %u:\n", i);
+ fprintf (dump_file, " def_loc: ");
+ dump_bitmap_file (dump_file, def_loc[i]);
+ fprintf (dump_file, " kill: ");
+ dump_bitmap_file (dump_file, kill[i]);
+ fprintf (dump_file, " vsetvl_def_in: ");
+ dump_bitmap_file (dump_file, vsetvl_def_in[i]);
+ fprintf (dump_file, " vsetvl_def_out: ");
+ dump_bitmap_file (dump_file, vsetvl_def_out[i]);
+ }
+ }
- After LCM:
- bb 1:
- vsetvli a5,a2,e32,m1,ta,mu
- LCM INSERTED: vsetvli zero,a5,e32,m1,ta,mu --> eliminate
- bb 2:
- ...
- */
-rtx_insn *
-pass_vsetvl::get_vsetvl_at_end (const bb_info *bb, vector_insn_info *dem) const
-{
- rtx_insn *end_vsetvl = BB_END (bb->cfg_bb ());
- if (end_vsetvl && NONDEBUG_INSN_P (end_vsetvl))
+ bool changed = false;
+ for (const bb_info *bb : crtl->ssa->bbs ())
{
- if (JUMP_P (end_vsetvl))
- end_vsetvl = PREV_INSN (end_vsetvl);
+ vsetvl_block_info &block_info = get_block_info (bb);
+ if (block_info.empty_p ())
+ continue;
+ vsetvl_info &curr_info = block_info.get_header_info ();
+ if (!curr_info.valid_p ())
+ continue;
- if (NONDEBUG_INSN_P (end_vsetvl)
- && vsetvl_discard_result_insn_p (end_vsetvl))
+ unsigned int expr_index;
+ sbitmap_iterator sbi;
+ bool full_available = true;
+ EXECUTE_IF_SET_IN_BITMAP (vsetvl_def_in[bb->index ()], 0, expr_index, sbi)
{
- /* Only handle single succ. here, multiple succ. is much
- more complicated. */
- if (single_succ_p (bb->cfg_bb ()))
+ vsetvl_info &prev_info = *vsetvl_def_exprs[expr_index];
+ if (!prev_info.valid_p ()
+ || !dem.available_with (prev_info, curr_info))
{
- edge e = single_succ_edge (bb->cfg_bb ());
- *dem = get_block_info (e->dest).local_dem;
- return end_vsetvl;
+ full_available = false;
+ break;
}
}
+ block_info.full_available = full_available;
}
- return nullptr;
+
+ sbitmap_vector_free (def_loc);
+ sbitmap_vector_free (kill);
+ return changed;
}
-/* This predicator should only used within same basic block. */
-static bool
-local_avl_compatible_p (rtx avl1, rtx avl2)
+bool
+pre_vsetvl::preds_has_same_avl_p (const vsetvl_info &curr_info)
{
- if (!REG_P (avl1) || !REG_P (avl2))
+ if (bitmap_empty_p (avin[curr_info.get_bb ()->index ()]))
return false;
- return REGNO (avl1) == REGNO (avl2);
+ unsigned expr_index;
+ sbitmap_iterator sbi;
+ EXECUTE_IF_SET_IN_BITMAP (avin[curr_info.get_bb ()->index ()], 0, expr_index,
+ sbi)
+ {
+ const vsetvl_info &prev_info = *exprs[expr_index];
+ if (!prev_info.valid_p ()
+ || !dem.available_avl_with (prev_info, curr_info))
+ return false;
+ }
+ return true;
}
-/* Local user vsetvl optimizaiton:
-
- Case 1:
- vsetvl a5,a4,e8,mf8
- ...
- vsetvl zero,a5,e8,mf8 --> Eliminate directly.
-
- Case 2:
- vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
- ...
- vsetvl zero,a5,e32,mf2 --> Eliminate directly. */
void
-pass_vsetvl::local_eliminate_vsetvl_insn (const bb_info *bb) const
+pre_vsetvl::pre_global_vsetvl_info ()
{
- rtx_insn *prev_vsetvl = nullptr;
- rtx_insn *curr_vsetvl = nullptr;
- rtx vl_placeholder = RVV_VLMAX;
- rtx prev_avl = vl_placeholder;
- rtx curr_avl = vl_placeholder;
- vector_insn_info prev_dem;
-
- /* Instruction inserted by LCM is not appeared in RTL-SSA yet, try to
- found those instruciton. */
- if (rtx_insn *end_vsetvl = get_vsetvl_at_end (bb, &prev_dem))
- {
- prev_avl = get_avl (end_vsetvl);
- prev_vsetvl = end_vsetvl;
- }
+ compute_avl_def_data ();
+ compute_vsetvl_def_data ();
+ compute_vsetvl_lcm_data ();
- bool skip_one = false;
- /* Backward propgate vsetvl info, drop the later one (prev_vsetvl) if it's
- compatible with current vsetvl (curr_avl), and merge the vtype and avl
- info. into current vsetvl. */
- for (insn_info *insn : bb->reverse_real_nondebug_insns ())
- {
- rtx_insn *rinsn = insn->rtl ();
- const auto &curr_dem = get_vector_info (insn);
- bool need_invalidate = false;
+ unsigned num_exprs = exprs.length ();
+ edges = pre_edge_lcm_avs (num_exprs, transp, avloc, antloc, kill, avin, avout,
+ &insert, &del);
+ unsigned num_edges = NUM_EDGES (edges);
- /* Skip if this insn already handled in last iteration. */
- if (skip_one)
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\n Compute LCM insert and delete data:\n\n");
+ fprintf (dump_file, " Expression List (%u):\n", num_exprs);
+ for (unsigned i = 0; i < num_exprs; i++)
{
- skip_one = false;
- continue;
+ const auto &info = *exprs[i];
+ fprintf (dump_file, " Expr[%u]: ", i);
+ info.dump (dump_file, " ");
}
-
- if (vsetvl_insn_p (rinsn))
+ fprintf (dump_file, "\n bitmap data:\n");
+ for (const bb_info *bb : crtl->ssa->bbs ())
{
- curr_vsetvl = rinsn;
- /* vsetvl are using vl rather than avl since it will try to merge
- with other vsetvl_discard_result.
-
- v--- avl
- vsetvl a5,a4,e8,mf8 # vsetvl
- ... ^--- vl
- vsetvl zero,a5,e8,mf8 # vsetvl_discard_result
- ^--- avl
- */
- curr_avl = get_vl (rinsn);
- /* vsetvl is a cut point of local backward vsetvl elimination. */
- need_invalidate = true;
+ unsigned i = bb->index ();
+ fprintf (dump_file, " BB %u:\n", i);
+ fprintf (dump_file, " avloc: ");
+ dump_bitmap_file (dump_file, avloc[i]);
+ fprintf (dump_file, " kill: ");
+ dump_bitmap_file (dump_file, kill[i]);
+ fprintf (dump_file, " antloc: ");
+ dump_bitmap_file (dump_file, antloc[i]);
+ fprintf (dump_file, " transp: ");
+ dump_bitmap_file (dump_file, transp[i]);
+
+ fprintf (dump_file, " avin: ");
+ dump_bitmap_file (dump_file, avin[i]);
+ fprintf (dump_file, " avout: ");
+ dump_bitmap_file (dump_file, avout[i]);
+ fprintf (dump_file, " del: ");
+ dump_bitmap_file (dump_file, del[i]);
}
- else if (has_vtype_op (rinsn) && NONDEBUG_INSN_P (PREV_INSN (rinsn))
- && (vsetvl_discard_result_insn_p (PREV_INSN (rinsn))
- || vsetvl_insn_p (PREV_INSN (rinsn))))
+ fprintf (dump_file, "\n");
+ fprintf (dump_file, " insert:\n");
+ for (unsigned ed = 0; ed < num_edges; ed++)
{
- curr_vsetvl = PREV_INSN (rinsn);
-
- if (vsetvl_insn_p (PREV_INSN (rinsn)))
- {
- /* Need invalidate and skip if it's vsetvl. */
- need_invalidate = true;
- /* vsetvl_discard_result_insn_p won't appeared in RTL-SSA,
- * so only need to skip for vsetvl. */
- skip_one = true;
- }
-
- curr_avl = curr_dem.get_avl ();
+ edge eg = INDEX_EDGE (edges, ed);
- /* Some instrucion like pred_extract_first<mode> don't reqruie avl, so
- the avl is null, use vl_placeholder for unify the handling
- logic. */
- if (!curr_avl)
- curr_avl = vl_placeholder;
- }
- else if (insn->is_call () || insn->is_asm ()
- || find_access (insn->defs (), VL_REGNUM)
- || find_access (insn->defs (), VTYPE_REGNUM)
- || (REG_P (prev_avl)
- && find_access (insn->defs (), REGNO (prev_avl))))
- {
- /* Invalidate if this insn can't propagate vl, vtype or avl. */
- need_invalidate = true;
- prev_dem = vector_insn_info ();
+ if (bitmap_empty_p (insert[ed]))
+ continue;
+ fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index,
+ eg->dest->index);
+ dump_bitmap_file (dump_file, insert[ed]);
}
- else
- /* Not interested instruction. */
+ }
+
+ /* Remove vsetvl infos as LCM suggest */
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ sbitmap d = del[bb->index ()];
+ if (bitmap_count_bits (d) == 0)
continue;
+ gcc_assert (bitmap_count_bits (d) == 1);
+ unsigned expr_index = bitmap_first_set_bit (d);
+ vsetvl_info &info = *exprs[expr_index];
+ gcc_assert (info.valid_p ());
+ gcc_assert (info.get_bb () == bb);
+ const vsetvl_block_info &block_info = get_block_info (info.get_bb ());
+ gcc_assert (block_info.get_header_info () == info);
+ info.set_ignore ();
+ }
- /* Local AVL compatibility checking is simpler than global, we only
- need to check the REGNO is same. */
- if (prev_dem.valid_or_dirty_p ()
- && prev_dem.skip_avl_compatible_p (curr_dem)
- && local_avl_compatible_p (prev_avl, curr_avl))
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ vsetvl_block_info &block_info = get_block_info (bb);
+ if (block_info.empty_p ())
+ continue;
+ vsetvl_info &curr_info = block_info.get_header_info ();
+ if (curr_info.ignore_p ())
{
- /* curr_dem and prev_dem is compatible! */
- /* Update avl info since we need to make sure they are fully
- compatible before merge. */
- prev_dem.set_avl_info (curr_dem.get_avl_info ());
- /* Merge both and update into curr_vsetvl. */
- prev_dem = curr_dem.local_merge (prev_dem);
- change_vsetvl_insn (curr_dem.get_insn (), prev_dem);
- /* Then we can drop prev_vsetvl. */
- eliminate_insn (prev_vsetvl);
+ if (block_info.infos.is_empty ())
+ continue;
+ curr_info = block_info.infos[0];
}
+ if (curr_info.valid_p () && !curr_info.use_by_non_rvv_insn_p ()
+ && preds_has_same_avl_p (curr_info))
+ curr_info.set_change_vtype_only ();
- if (need_invalidate)
- {
- prev_vsetvl = nullptr;
- curr_vsetvl = nullptr;
- prev_avl = vl_placeholder;
- curr_avl = vl_placeholder;
- prev_dem = vector_insn_info ();
- }
- else
+ vsetvl_info prev_info = vsetvl_info ();
+ prev_info.set_empty ();
+ for (auto &curr_info : block_info.infos)
{
- prev_vsetvl = curr_vsetvl;
- prev_avl = curr_avl;
- prev_dem = curr_dem;
+ if (prev_info.valid_p () && curr_info.valid_p ()
+ && dem.available_avl_with (prev_info, curr_info))
+ curr_info.set_change_vtype_only ();
+ prev_info = curr_info;
}
}
-}
-/* Return the first vsetvl instruction in CFG_BB or NULL if
- none exists or if a user RVV instruction is enountered
- prior to any vsetvl. */
-static rtx_insn *
-get_first_vsetvl_before_rvv_insns (basic_block cfg_bb,
- enum vsetvl_type insn_type)
-{
- gcc_assert (insn_type == VSETVL_DISCARD_RESULT
- || insn_type == VSETVL_VTYPE_CHANGE_ONLY);
- rtx_insn *rinsn;
- FOR_BB_INSNS (cfg_bb, rinsn)
+ /* Cancel unnecessary insert. */
+ for (int ed = 0; ed < NUM_EDGES (edges); ed++)
{
- if (!NONDEBUG_INSN_P (rinsn))
+ edge eg = INDEX_EDGE (edges, ed);
+ sbitmap i = insert[ed];
+ if (bitmap_count_bits (i) < 1)
continue;
- /* If we don't find any inserted vsetvli before user RVV instructions,
- we don't need to optimize the vsetvls in this block. */
- if (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn))
- return nullptr;
- if (insn_type == VSETVL_DISCARD_RESULT
- && vsetvl_discard_result_insn_p (rinsn))
- return rinsn;
- if (insn_type == VSETVL_VTYPE_CHANGE_ONLY
- && vsetvl_vtype_change_only_p (rinsn))
- return rinsn;
+ if (bitmap_count_bits (i) > 1)
+ /* For code with infinite loop (e.g. pr61634.c) The data flow is
+ completely wrong. */
+ continue;
+
+ gcc_assert (bitmap_count_bits (i) == 1);
+ unsigned expr_index = bitmap_first_set_bit (i);
+ const vsetvl_info &info = *exprs[expr_index];
+ gcc_assert (info.valid_p ());
+ if (eg->src->succs->length () == 1)
+ {
+ vsetvl_block_info &block_info = get_block_info (eg->src);
+ if (block_info.empty_p ())
+ continue;
+
+ vsetvl_info &prev_info = block_info.get_footer_info ();
+ if (!block_info.has_info () && !prev_info.ignore_p ()
+ && prev_info.valid_p ())
+ {
+ vsetvl_info curr_info = info;
+ curr_info.set_bb (prev_info.get_bb ());
+ if (dem.compatible_with (prev_info, curr_info))
+ {
+ dem.merge_with (prev_info, curr_info);
+ bitmap_clear_bit (i, expr_index);
+ }
+ }
+ }
}
- return nullptr;
}
-/* Global user vsetvl optimizaiton:
-
- Case 1:
- bb 1:
- vsetvl a5,a4,e8,mf8
- ...
- bb 2:
- ...
- vsetvl zero,a5,e8,mf8 --> Eliminate directly.
-
- Case 2:
- bb 1:
- vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
- ...
- bb 2:
- ...
- vsetvl zero,a5,e32,mf2 --> Eliminate directly.
-
- Case 3:
- bb 1:
- vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
- ...
- bb 2:
- ...
- vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
- goto bb 3
- bb 3:
- ...
- vsetvl zero,a5,e32,mf2 --> Eliminate directly.
-*/
-bool
-pass_vsetvl::global_eliminate_vsetvl_insn (const bb_info *bb) const
+void
+pre_vsetvl::emit_vsetvl ()
{
- rtx_insn *vsetvl_rinsn = NULL;
- vector_insn_info dem = vector_insn_info ();
- const auto &block_info = get_block_info (bb);
- basic_block cfg_bb = bb->cfg_bb ();
+ bool need_commit = false;
- if (block_info.local_dem.valid_or_dirty_p ())
+ for (const bb_info *bb : crtl->ssa->bbs ())
{
- /* Optimize the local vsetvl. */
- dem = block_info.local_dem;
- vsetvl_rinsn
- = get_first_vsetvl_before_rvv_insns (cfg_bb, VSETVL_DISCARD_RESULT);
+ for (const auto &curr_info : get_block_info (bb).infos)
+ {
+ insn_info *insn = curr_info.get_insn ();
+ if (curr_info.ignore_p ())
+ {
+ if (vsetvl_insn_p (insn->rtl ()))
+ eliminate_insn (insn->rtl ());
+ continue;
+ }
+ else if (curr_info.valid_p ())
+ {
+ if (vsetvl_insn_p (insn->rtl ()))
+ {
+ const vsetvl_info temp = vsetvl_info (insn);
+ if (!(curr_info == temp))
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "\n Change vsetvl info from: ");
+ temp.dump (dump_file, " ");
+ fprintf (dump_file, " to: ");
+ curr_info.dump (dump_file, " ");
+ }
+ change_vsetvl_insn (insn, curr_info);
+ }
+ }
+ else
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "\n Insert vsetvl info before insn %d: ",
+ insn->uid ());
+ curr_info.dump (dump_file, " ");
+ }
+ insert_vsetvl (EMIT_BEFORE, insn->rtl (), curr_info);
+ }
+ }
+ }
}
- if (!vsetvl_rinsn)
- /* Optimize the global vsetvl inserted by LCM. */
- vsetvl_rinsn = get_vsetvl_at_end (bb, &dem);
-
- /* No need to optimize if block doesn't have vsetvl instructions. */
- if (!dem.valid_or_dirty_p () || !vsetvl_rinsn || !dem.get_avl_source ()
- || !dem.has_avl_reg ())
- return false;
-
- /* Condition 1: Check it has preds. */
- if (EDGE_COUNT (cfg_bb->preds) == 0)
- return false;
-
- /* If all preds has VL/VTYPE status setted by user vsetvls, and these
- user vsetvls are all skip_avl_compatible_p with the vsetvl in this
- block, we can eliminate this vsetvl instruction. */
- sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index];
- unsigned int bb_index;
- sbitmap_iterator sbi;
- rtx avl = dem.get_avl ();
- hash_set<set_info *> sets
- = get_all_sets (dem.get_avl_source (), true, false, false);
- /* Condition 2: All VL/VTYPE available in are all compatible. */
- EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
+ for (const vsetvl_info &item : delete_list)
{
- const auto &expr = m_vector_manager->vector_exprs[bb_index];
- const auto &insn = expr->get_insn ();
- def_info *def = find_access (insn->defs (), REGNO (avl));
- set_info *set = safe_dyn_cast<set_info *> (def);
- if (!vsetvl_insn_p (insn->rtl ()) || insn->bb () == bb
- || !sets.contains (set))
- return false;
+ gcc_assert (vsetvl_insn_p (item.get_insn ()->rtl ()));
+ eliminate_insn (item.get_insn ()->rtl ());
}
- /* Condition 3: We don't do the global optimization for the block
- has a pred is entry block or exit block. */
- /* Condition 4: All preds have available VL/VTYPE out. */
- edge e;
- edge_iterator ei;
- FOR_EACH_EDGE (e, ei, cfg_bb->preds)
+ /* Insert vsetvl as LCM suggest. */
+ for (int ed = 0; ed < NUM_EDGES (edges); ed++)
{
- sbitmap avout = m_vector_manager->vector_avout[e->src->index];
- if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
- || e->src == EXIT_BLOCK_PTR_FOR_FN (cfun)
- || (unsigned int) e->src->index
- >= m_vector_manager->vector_block_infos.length ()
- || bitmap_empty_p (avout))
- return false;
+ edge eg = INDEX_EDGE (edges, ed);
+ sbitmap i = insert[ed];
+ if (bitmap_count_bits (i) < 1)
+ continue;
+
+ if (bitmap_count_bits (i) > 1)
+ /* For code with infinite loop (e.g. pr61634.c), The data flow is
+ completely wrong. */
+ continue;
- EXECUTE_IF_SET_IN_BITMAP (avout, 0, bb_index, sbi)
+ gcc_assert (bitmap_count_bits (i) == 1);
+ unsigned expr_index = bitmap_first_set_bit (i);
+ const vsetvl_info &info = *exprs[expr_index];
+ gcc_assert (info.valid_p ());
+ if (dump_file)
{
- const auto &expr = m_vector_manager->vector_exprs[bb_index];
- const auto &insn = expr->get_insn ();
- def_info *def = find_access (insn->defs (), REGNO (avl));
- set_info *set = safe_dyn_cast<set_info *> (def);
- if (!vsetvl_insn_p (insn->rtl ()) || insn->bb () == bb
- || !sets.contains (set) || !expr->skip_avl_compatible_p (dem))
- return false;
+ fprintf (dump_file,
+ "\n Insert vsetvl info at edge(bb %u -> bb %u): ",
+ eg->src->index, eg->dest->index);
+ info.dump (dump_file, " ");
}
+ rtl_profile_for_edge (eg);
+ start_sequence ();
+
+ insn_info *insn = info.get_insn ();
+ insert_vsetvl (EMIT_DIRECT, insn->rtl (), info);
+ rtx_insn *rinsn = get_insns ();
+ end_sequence ();
+ default_rtl_profile ();
+
+ /* We should not get an abnormal edge here. */
+ gcc_assert (!(eg->flags & EDGE_ABNORMAL));
+ need_commit = true;
+ insert_insn_on_edge (rinsn, eg);
}
- /* Step1: Reshape the VL/VTYPE status to make sure everything compatible. */
- auto_vec<basic_block> pred_cfg_bbs
- = get_dominated_by (CDI_POST_DOMINATORS, cfg_bb);
- FOR_EACH_EDGE (e, ei, cfg_bb->preds)
+ /* Insert vsetvl info that was not deleted after lift up. */
+ for (const bb_info *bb : crtl->ssa->bbs ())
{
- sbitmap avout = m_vector_manager->vector_avout[e->src->index];
- EXECUTE_IF_SET_IN_BITMAP (avout, 0, bb_index, sbi)
+ const vsetvl_block_info &block_info = get_block_info (bb);
+ if (!block_info.has_info ())
+ continue;
+
+ const vsetvl_info &footer_info = block_info.get_footer_info ();
+ insn_info *insn = footer_info.get_insn ();
+
+ if (footer_info.ignore_p ())
+ continue;
+
+ edge eg;
+ edge_iterator eg_iterator;
+ FOR_EACH_EDGE (eg, eg_iterator, bb->cfg_bb ()->succs)
{
- vector_insn_info prev_dem = *m_vector_manager->vector_exprs[bb_index];
- vector_insn_info curr_dem = dem;
- insn_info *insn = prev_dem.get_insn ();
- if (!pred_cfg_bbs.contains (insn->bb ()->cfg_bb ()))
- continue;
- /* Update avl info since we need to make sure they are fully
- compatible before merge. */
- curr_dem.set_avl_info (prev_dem.get_avl_info ());
- /* Merge both and update into curr_vsetvl. */
- prev_dem = curr_dem.local_merge (prev_dem);
- change_vsetvl_insn (insn, prev_dem);
+ gcc_assert (!(eg->flags & EDGE_ABNORMAL));
+ if (dump_file)
+ {
+ fprintf (
+ dump_file,
+ "\n Insert missed vsetvl info at edge(bb %u -> bb %u): ",
+ eg->src->index, eg->dest->index);
+ footer_info.dump (dump_file, " ");
+ }
+ start_sequence ();
+ insert_vsetvl (EMIT_DIRECT, insn->rtl (), footer_info);
+ rtx_insn *rinsn = get_insns ();
+ end_sequence ();
+ default_rtl_profile ();
+ insert_insn_on_edge (rinsn, eg);
+ need_commit = true;
}
}
- /* Step2: eliminate the vsetvl instruction. */
- eliminate_insn (vsetvl_rinsn);
- return true;
+ if (need_commit)
+ commit_edge_insertions ();
}
-/* This function does the following post optimization base on RTL_SSA:
-
- 1. Local user vsetvl optimizations.
- 2. Global user vsetvl optimizations.
- 3. AVL dependencies removal:
- Before VSETVL PASS, RVV instructions pattern is depending on AVL operand
- implicitly. Since we will emit VSETVL instruction and make RVV
- instructions depending on VL/VTYPE global status registers, we remove the
- such AVL operand in the RVV instructions pattern here in order to remove
- AVL dependencies when AVL operand is a register operand.
-
- Before the VSETVL PASS:
- li a5,32
- ...
- vadd.vv (..., a5)
- After the VSETVL PASS:
- li a5,32
- vsetvli zero, a5, ...
- ...
- vadd.vv (..., const_int 0). */
void
-pass_vsetvl::ssa_post_optimization (void) const
+pre_vsetvl::cleaup ()
{
- for (const bb_info *bb : crtl->ssa->bbs ())
- {
- local_eliminate_vsetvl_insn (bb);
- bool changed_p = true;
- while (changed_p)
- {
- changed_p = false;
- changed_p |= global_eliminate_vsetvl_insn (bb);
- }
- for (insn_info *insn : bb->real_nondebug_insns ())
- {
- rtx_insn *rinsn = insn->rtl ();
- if (vlmax_avl_insn_p (rinsn))
- {
- eliminate_insn (rinsn);
- continue;
- }
+ remove_avl_operand ();
+ remove_unused_dest_operand ();
+}
- /* Erase the AVL operand from the instruction. */
- if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn)))
- continue;
- rtx avl = get_vl (rinsn);
- if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
- {
- /* Get the list of uses for the new instruction. */
- auto attempt = crtl->ssa->new_change_attempt ();
- insn_change change (insn);
- /* Remove the use of the substituted value. */
- access_array_builder uses_builder (attempt);
- uses_builder.reserve (insn->num_uses () - 1);
- for (use_info *use : insn->uses ())
- if (use != find_access (insn->uses (), REGNO (avl)))
- uses_builder.quick_push (use);
- use_array new_uses = use_array (uses_builder.finish ());
- change.new_uses = new_uses;
- change.move_range = insn->ebb ()->insn_range ();
- rtx pat;
- if (fault_first_load_p (rinsn))
- pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
- else
- {
- rtx set = single_set (rinsn);
- rtx src
- = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
- pat = gen_rtx_SET (SET_DEST (set), src);
- }
- bool ok = change_insn (crtl->ssa, change, insn, pat);
- gcc_assert (ok);
- }
- }
- }
+void
+pre_vsetvl::remove_avl_operand ()
+{
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ for (insn_info *insn : bb->real_nondebug_insns ())
+ {
+ rtx_insn *rinsn = insn->rtl ();
+ /* Erase the AVL operand from the instruction. */
+ if (!has_vl_op (rinsn) || !REG_P (get_vl2 (rinsn)))
+ continue;
+ rtx avl = get_vl2 (rinsn);
+ if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
+ {
+ /* Get the list of uses for the new instruction. */
+ auto attempt = crtl->ssa->new_change_attempt ();
+ insn_change change (insn);
+ /* Remove the use of the substituted value. */
+ access_array_builder uses_builder (attempt);
+ uses_builder.reserve (insn->num_uses () - 1);
+ for (use_info *use : insn->uses ())
+ if (use != find_access (insn->uses (), REGNO (avl)))
+ uses_builder.quick_push (use);
+ use_array new_uses = use_array (uses_builder.finish ());
+ change.new_uses = new_uses;
+ change.move_range = insn->ebb ()->insn_range ();
+ rtx pat;
+ if (fault_first_load_p (rinsn))
+ pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
+ else
+ {
+ rtx set = single_set (rinsn);
+ rtx src = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
+ pat = gen_rtx_SET (SET_DEST (set), src);
+ }
+ bool ok = change_insn (crtl->ssa, change, insn, pat);
+ gcc_assert (ok);
+ }
+ }
}
/* Return true if the SET result is not used by any instructions. */
static bool
has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno)
{
- /* Handle the following case that can not be detected in RTL_SSA. */
- /* E.g.
- li a5, 100
- vsetvli a6, a5...
- ...
- vadd (use a6)
-
- The use of "a6" is removed from "vadd" but the information is
- not updated in RTL_SSA framework. We don't want to re-new
- a new RTL_SSA which is expensive, instead, we use data-flow
- analysis to check whether "a6" has no uses. */
if (bitmap_bit_p (df_get_live_out (cfg_bb), regno))
return false;
@@ -4195,18 +3498,8 @@ has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno)
return true;
}
-/* This function does the following post optimization base on dataflow
- analysis:
-
- 1. Change vsetvl rd, rs1 --> vsevl zero, rs1, if rd is not used by any
- nondebug instructions. Even though this PASS runs after RA and it doesn't
- help for reduce register pressure, it can help instructions scheduling since
- we remove the dependencies.
-
- 2. Remove redundant user vsetvls base on outcome of Phase 4 (LCM) && Phase 5
- (AVL dependencies removal). */
void
-pass_vsetvl::df_post_optimization (void) const
+pre_vsetvl::remove_unused_dest_operand ()
{
df_analyze ();
hash_set<rtx_insn *> to_delete;
@@ -4219,22 +3512,10 @@ pass_vsetvl::df_post_optimization (void) const
if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn))
{
rtx vl = get_vl (rinsn);
- vector_insn_info info;
- info.parse_insn (rinsn);
- bool to_delete_p = m_vector_manager->to_delete_p (rinsn);
- bool to_refine_p = m_vector_manager->to_refine_p (rinsn);
+ vsetvl_info info = vsetvl_info (rinsn);
if (has_no_uses (cfg_bb, rinsn, REGNO (vl)))
{
- if (to_delete_p)
- to_delete.add (rinsn);
- else if (to_refine_p)
- {
- rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY,
- info, NULL_RTX);
- validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat,
- false);
- }
- else if (!vlmax_avl_p (info.get_avl ()))
+ if (!info.has_vlmax_avl ())
{
rtx new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info,
NULL_RTX);
@@ -4245,143 +3526,108 @@ pass_vsetvl::df_post_optimization (void) const
}
}
}
- for (rtx_insn *rinsn : to_delete)
- eliminate_insn (rinsn);
}
-void
-pass_vsetvl::init (void)
-{
- if (optimize > 0)
- {
- /* Initialization of RTL_SSA. */
- calculate_dominance_info (CDI_DOMINATORS);
- calculate_dominance_info (CDI_POST_DOMINATORS);
- df_analyze ();
- crtl->ssa = new function_info (cfun);
- }
+const pass_data pass_data_vsetvl = {
+ RTL_PASS, /* type */
+ "vsetvl", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
- m_vector_manager = new vector_infos_manager ();
- compute_probabilities ();
+class pass_vsetvl : public rtl_opt_pass
+{
+private:
+ void simple_vsetvl ();
+ void lazy_vsetvl ();
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "\nPrologue: Initialize vector infos\n");
- m_vector_manager->dump (dump_file);
- }
-}
+public:
+ pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {}
-void
-pass_vsetvl::done (void)
-{
- if (optimize > 0)
- {
- /* Finalization of RTL_SSA. */
- free_dominance_info (CDI_DOMINATORS);
- free_dominance_info (CDI_POST_DOMINATORS);
- if (crtl->ssa->perform_pending_updates ())
- cleanup_cfg (0);
- delete crtl->ssa;
- crtl->ssa = nullptr;
- }
- m_vector_manager->release ();
- delete m_vector_manager;
- m_vector_manager = nullptr;
-}
+ /* opt_pass methods: */
+ virtual bool gate (function *) final override { return TARGET_VECTOR; }
+ virtual unsigned int execute (function *) final override;
+}; // class pass_vsetvl
-/* Compute probability for each block. */
void
-pass_vsetvl::compute_probabilities (void)
+pass_vsetvl::simple_vsetvl ()
{
- /* Don't compute it in -O0 since we don't need it. */
- if (!optimize)
- return;
- edge e;
- edge_iterator ei;
+ if (dump_file)
+ fprintf (dump_file, "\nEntering Simple VSETVL PASS\n");
- for (const bb_info *bb : crtl->ssa->bbs ())
+ basic_block cfg_bb;
+ rtx_insn *rinsn;
+ FOR_ALL_BB_FN (cfg_bb, cfun)
{
- basic_block cfg_bb = bb->cfg_bb ();
- auto &curr_prob = get_block_info (cfg_bb).probability;
-
- /* GCC assume entry block (bb 0) are always so
- executed so set its probability as "always". */
- if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
- curr_prob = profile_probability::always ();
- /* Exit block (bb 1) is the block we don't need to process. */
- if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
- continue;
-
- gcc_assert (curr_prob.initialized_p ());
- FOR_EACH_EDGE (e, ei, cfg_bb->succs)
+ FOR_BB_INSNS (cfg_bb, rinsn)
{
- auto &new_prob = get_block_info (e->dest).probability;
- /* Normally, the edge probability should be initialized.
- However, some special testing code which is written in
- GIMPLE IR style force the edge probility uninitialized,
- we conservatively set it as never so that it will not
- affect PRE (Phase 3 && Phse 4). */
- if (!e->probability.initialized_p ())
- new_prob = profile_probability::never ();
- else if (!new_prob.initialized_p ())
- new_prob = curr_prob * e->probability;
- else if (new_prob == profile_probability::always ())
+ if (!NONDEBUG_INSN_P (rinsn))
continue;
- else
- new_prob += curr_prob * e->probability;
+ if (has_vtype_op (rinsn))
+ {
+ const auto info = vsetvl_info (rinsn);
+ emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_BEFORE, info,
+ NULL_RTX, rinsn);
+ }
}
}
}
/* Lazy vsetvl insertion for optimize > 0. */
void
-pass_vsetvl::lazy_vsetvl (void)
+pass_vsetvl::lazy_vsetvl ()
{
if (dump_file)
- fprintf (dump_file,
- "\nEntering Lazy VSETVL PASS and Handling %d basic blocks for "
- "function:%s\n",
- n_basic_blocks_for_fn (cfun), function_name (cfun));
+ fprintf (dump_file, "\nEntering Lazy VSETVL PASS\n\n");
+
+ pre_vsetvl pre = pre_vsetvl ();
- /* Phase 1 - Compute the local dems within each block.
- The data-flow analysis within each block is backward analysis. */
if (dump_file)
- fprintf (dump_file, "\nPhase 1: Compute local backward vector infos\n");
- for (const bb_info *bb : crtl->ssa->bbs ())
- compute_local_backward_infos (bb);
+ fprintf (dump_file, "\nPhase 1: Fuse local vsetvl infos.\n\n");
+ pre.fuse_local_vsetvl_info ();
if (dump_file && (dump_flags & TDF_DETAILS))
- m_vector_manager->dump (dump_file);
+ pre.dump (dump_file, "phase 1");
- /* Phase 2 - Emit vsetvl instructions within each basic block according to
- demand, compute and save ANTLOC && AVLOC of each block. */
+ /* Phase 2: Fuse header and footer vsetvl infos between basic blocks. */
if (dump_file)
- fprintf (dump_file,
- "\nPhase 2: Emit vsetvl instruction within each block\n");
- for (const bb_info *bb : crtl->ssa->bbs ())
- emit_local_forward_vsetvls (bb);
+ fprintf (dump_file, "\nPhase 2: Lift up vsetvl info.\n\n");
+ bool changed;
+ int fused_count = 0;
+ do
+ {
+ if (dump_file)
+ fprintf (dump_file, " Try lift up %d.\n\n", fused_count);
+ changed = pre.earliest_fuse_vsetvl_info ();
+ fused_count += 1;
+ } while (changed);
+
if (dump_file && (dump_flags & TDF_DETAILS))
- m_vector_manager->dump (dump_file);
+ pre.dump (dump_file, "phase 2");
- /* Phase 3 - Propagate demanded info across blocks. */
+ /* Phase 3: Reducing redundant vsetvl infos using LCM. */
if (dump_file)
- fprintf (dump_file, "\nPhase 3: Demands propagation across blocks\n");
- vsetvl_fusion ();
+ fprintf (dump_file, "\nPhase 3: Reduce global vsetvl infos.\n\n");
+ pre.pre_global_vsetvl_info ();
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ pre.dump (dump_file, "phase 3");
- /* Phase 4 - Lazy code motion. */
+ /* Phase 4: Insert, modify and remove vsetvl insns. */
if (dump_file)
- fprintf (dump_file, "\nPhase 4: PRE vsetvl by Lazy code motion (LCM)\n");
- pre_vsetvl ();
+ fprintf (dump_file,
+ "\nPhase 4: Insert, modify and remove vsetvl insns.\n\n");
+ pre.emit_vsetvl ();
- /* Phase 5 - Post optimization base on RTL_SSA. */
+ /* Phase 5: Cleaup */
if (dump_file)
- fprintf (dump_file, "\nPhase 5: Post optimization base on RTL_SSA\n");
- ssa_post_optimization ();
+ fprintf (dump_file, "\nPhase 5: Cleaup\n\n");
+ pre.cleaup ();
- /* Phase 6 - Post optimization base on data-flow analysis. */
- if (dump_file)
- fprintf (dump_file,
- "\nPhase 6: Post optimization base on data-flow analysis\n");
- df_post_optimization ();
+ pre.finish ();
}
/* Main entry point for this pass. */
@@ -4400,14 +3646,11 @@ pass_vsetvl::execute (function *)
if (!has_vector_insn (cfun))
return 0;
- init ();
-
if (!optimize)
simple_vsetvl ();
else
lazy_vsetvl ();
- done ();
return 0;
}
@@ -18,496 +18,156 @@ You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
-#ifndef DEF_INCOMPATIBLE_COND
-#define DEF_INCOMPATIBLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, \
- GE_SEW1, TAIL_POLICTY1, MASK_POLICY1, AVL2, \
- SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, \
- TAIL_POLICTY2, MASK_POLICY2, COND)
+/* DEF_XXX_RULE (prev_demand, next_demand, fused_demand, compatible_p,
+ available_p, fuse)
+ prev_demand: the prev vector insn's sew_lmul_type
+ next_demand: the next vector insn's sew_lmul_type
+ fused_demand: if them are compatible, change prev_info demand to the
+ fused_demand after fuse prev_info and next_info
+ compatible_p: check if prev_demand and next_demand are compatible
+ available_p: check if prev_demand is available for next_demand
+ fuse: if them are compatible, how to modify prev_info */
+
+#ifndef DEF_SEW_LMUL_RULE
+#define DEF_SEW_LMUL_RULE(prev_demand, next_demand, fused_demand, \
+ compatible_p, available_p, fuse)
#endif
-#ifndef DEF_SEW_LMUL_FUSE_RULE
-#define DEF_SEW_LMUL_FUSE_RULE(DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, \
- DEMAND_GE_SEW1, DEMAND_SEW2, DEMAND_LMUL2, \
- DEMAND_RATIO2, DEMAND_GE_SEW2, NEW_DEMAND_SEW, \
- NEW_DEMAND_LMUL, NEW_DEMAND_RATIO, \
- NEW_DEMAND_GE_SEW, NEW_SEW, NEW_VLMUL, \
- NEW_RATIO)
+#ifndef DEF_POLICY_RULE
+#define DEF_POLICY_RULE(prev_demand, next_demand, fused_demand, compatible_p, \
+ available_p, fuse)
#endif
-#ifndef DEF_UNAVAILABLE_COND
-#define DEF_UNAVAILABLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, \
- TAIL_POLICTY1, MASK_POLICY1, AVL2, SEW2, LMUL2, \
- RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
- MASK_POLICY2, COND)
+#ifndef DEF_AVL_RULE
+#define DEF_AVL_RULE(prev_demand, next_demand, fused_demand, compatible_p, \
+ available_p, fuse)
#endif
-/* Case 1: Demand compatible AVL. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_TRUE, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_TRUE, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ incompatible_avl_p)
-
-/* Case 2: Demand same SEW. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_sew_p)
-
-/* Case 3: Demand same LMUL. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_lmul_p)
-
-/* Case 4: Demand same RATIO. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_ratio_p)
-
-/* Case 5: Demand same TAIL_POLICY. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_TRUE, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_TRUE, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_tail_policy_p)
-
-/* Case 6: Demand same MASK_POLICY. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_TRUE,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_TRUE,
- /*COND*/ different_mask_policy_p)
-
-/* Case 7: Demand non zero AVL. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_TRUE, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_TRUE, /*GE_SEW*/ DEMAND_ANY,
- DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_TRUE, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_ANY,
- DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ possible_zero_avl_p)
-
-/* Case 8: First SEW/LMUL/GE_SEW <-> Second RATIO/SEW. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ second_ratio_invalid_for_first_sew_p)
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ second_ratio_invalid_for_first_lmul_p)
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ second_sew_less_than_first_sew_p)
-
-/* Case 9: First (GE_SEW + LMUL) <-> Second RATIO. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ second_ratio_less_than_first_ratio_p)
-/* Case 11: First (SEW + LMUL) <-> Second RATIO. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_ratio_p)
-/* Case 13: First (GE_SEW/SEW + RATIO) <-> Second LMUL. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_lmul_p)
-/* Case 14: First (LMUL + RATIO) <-> Second SEW. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_sew_p)
-/* Case 15: First (LMUL + RATIO) <-> Second GE_SEW. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ first_sew_less_than_second_sew_p)
-
-/* Case 16: First SEW + Second LMUL <-> First RATIO. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_lmul_p)
-/* Case 17: First SEW + Second LMUL <-> Second RATIO. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_sew_p)
-
-/* Case 18: First SEW + Second RATIO <-> First LMUL. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_ratio_p)
-
-/* Case 19: First GE_SEW + Second LMUL <-> First RATIO. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ second_lmul_less_than_first_lmul_p)
-/* Case 20: First GE_SEW + Second LMUL <-> Second RATIO. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ second_sew_less_than_first_sew_p)
-
-/* Case 21: First GE_SEW + Second RATIO <-> First LMUL. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ second_ratio_less_than_first_ratio_p)
-
-/* Case 22: First GE_SEW + Second SEW + First LMUL + Second ratio. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_lmul_p)
-
-/* Case 23: First GE_SEW + Second SEW + Second LMUL + First ratio. */
-DEF_INCOMPATIBLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ different_ratio_p)
-
-/* Merge rules. */
-DEF_SEW_LMUL_FUSE_RULE (/*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_FALSE,
- /*RATIO*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_TRUE,
- /*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_FALSE,
- /*RATIO*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_TRUE,
- /*NEW_DEMAND_SEW*/ true,
- /*NEW_DEMAND_LMUL*/ false,
- /*NEW_DEMAND_RATIO*/ false,
- /*NEW_DEMAND_GE_SEW*/ true, greatest_sew, first_vlmul,
- first_ratio)
-
-DEF_SEW_LMUL_FUSE_RULE (/*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_ANY,
- /*RATIO*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*SEW*/ DEMAND_ANY, /*LMUL*/ DEMAND_ANY,
- /*RATIO*/ DEMAND_TRUE, /*GE_SEW*/ DEMAND_ANY,
- /*NEW_DEMAND_SEW*/ true,
- /*NEW_DEMAND_LMUL*/ true,
- /*NEW_DEMAND_RATIO*/ false,
- /*NEW_DEMAND_GE_SEW*/ false, first_sew,
- vlmul_for_first_sew_second_ratio, second_ratio)
-DEF_SEW_LMUL_FUSE_RULE (/*SEW*/ DEMAND_ANY, /*LMUL*/ DEMAND_TRUE,
- /*RATIO*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_ANY,
- /*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_ANY,
- /*RATIO*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*NEW_DEMAND_SEW*/ true,
- /*NEW_DEMAND_LMUL*/ true,
- /*NEW_DEMAND_RATIO*/ false,
- /*NEW_DEMAND_GE_SEW*/ false, second_sew, first_vlmul,
- ratio_for_second_sew_first_vlmul)
-DEF_SEW_LMUL_FUSE_RULE (/*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_FALSE,
- /*RATIO*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*SEW*/ DEMAND_FALSE, /*LMUL*/ DEMAND_FALSE,
- /*RATIO*/ DEMAND_TRUE, /*GE_SEW*/ DEMAND_FALSE,
- /*NEW_DEMAND_SEW*/ true,
- /*NEW_DEMAND_LMUL*/ false,
- /*NEW_DEMAND_RATIO*/ true,
- /*NEW_DEMAND_GE_SEW*/ true, first_sew,
- vlmul_for_first_sew_second_ratio, second_ratio)
-DEF_SEW_LMUL_FUSE_RULE (/*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_FALSE,
- /*RATIO*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_FALSE,
- /*RATIO*/ DEMAND_TRUE, /*GE_SEW*/ DEMAND_TRUE,
- /*NEW_DEMAND_SEW*/ true,
- /*NEW_DEMAND_LMUL*/ false,
- /*NEW_DEMAND_RATIO*/ true,
- /*NEW_DEMAND_GE_SEW*/ true, greatest_sew,
- vlmul_for_greatest_sew_second_ratio, second_ratio)
-DEF_SEW_LMUL_FUSE_RULE (/*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_FALSE,
- /*RATIO*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*SEW*/ DEMAND_FALSE, /*LMUL*/ DEMAND_TRUE,
- /*RATIO*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_FALSE,
- /*NEW_DEMAND_SEW*/ true,
- /*NEW_DEMAND_LMUL*/ true,
- /*NEW_DEMAND_RATIO*/ false,
- /*NEW_DEMAND_GE_SEW*/ true, first_sew, second_vlmul,
- second_ratio)
-DEF_SEW_LMUL_FUSE_RULE (/*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_FALSE,
- /*RATIO*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_TRUE,
- /*RATIO*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_FALSE,
- /*NEW_DEMAND_SEW*/ true,
- /*NEW_DEMAND_LMUL*/ true,
- /*NEW_DEMAND_RATIO*/ false,
- /*NEW_DEMAND_GE_SEW*/ false, second_sew, second_vlmul,
- second_ratio)
-DEF_SEW_LMUL_FUSE_RULE (/*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_FALSE,
- /*RATIO*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_TRUE,
- /*RATIO*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_TRUE,
- /*NEW_DEMAND_SEW*/ true,
- /*NEW_DEMAND_LMUL*/ true,
- /*NEW_DEMAND_RATIO*/ false,
- /*NEW_DEMAND_GE_SEW*/ false, greatest_sew, second_vlmul,
- second_ratio)
-
-DEF_SEW_LMUL_FUSE_RULE (/*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_FALSE,
- /*RATIO*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_TRUE,
- /*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_FALSE,
- /*RATIO*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_FALSE,
- /*NEW_DEMAND_SEW*/ true,
- /*NEW_DEMAND_LMUL*/ false,
- /*NEW_DEMAND_RATIO*/ false,
- /*NEW_DEMAND_GE_SEW*/ false, second_sew, second_vlmul,
- second_ratio)
-DEF_SEW_LMUL_FUSE_RULE (/*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_TRUE,
- /*RATIO*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_TRUE,
- /*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_FALSE,
- /*RATIO*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_FALSE,
- /*NEW_DEMAND_SEW*/ true,
- /*NEW_DEMAND_LMUL*/ true,
- /*NEW_DEMAND_RATIO*/ false,
- /*NEW_DEMAND_GE_SEW*/ false, second_sew, first_vlmul,
- second_ratio)
-DEF_SEW_LMUL_FUSE_RULE (/*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_FALSE,
- /*RATIO*/ DEMAND_TRUE, /*GE_SEW*/ DEMAND_TRUE,
- /*SEW*/ DEMAND_TRUE, /*LMUL*/ DEMAND_FALSE,
- /*RATIO*/ DEMAND_FALSE, /*GE_SEW*/ DEMAND_FALSE,
- /*NEW_DEMAND_SEW*/ true,
- /*NEW_DEMAND_LMUL*/ false,
- /*NEW_DEMAND_RATIO*/ true,
- /*NEW_DEMAND_GE_SEW*/ false, second_sew, first_vlmul,
- first_ratio)
-
-/* Define the unavailable cases for LCM. */
-
-/* Case 1: Dem1 (Not demand AVL) is unavailable to Dem2 (Demand AVL). */
-DEF_UNAVAILABLE_COND (/*AVL*/ DEMAND_FALSE, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_TRUE, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ always_unavailable)
-/* Case 2: Dem1 (Demand AVL) is unavailable to Dem2 (Demand normal AVL). */
-DEF_UNAVAILABLE_COND (/*AVL*/ DEMAND_TRUE, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_TRUE, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ avl_unavailable_p)
-
-/* Case 3: Dem1 (Not demand TAIL) is unavailable to Dem2 (Demand TAIL). */
-DEF_UNAVAILABLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_FALSE, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_TRUE, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ always_unavailable)
-
-/* Case 4: Dem1 (Not demand MASK) is unavailable to Dem2 (Demand MASK). */
-DEF_UNAVAILABLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_FALSE,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_TRUE,
- /*COND*/ always_unavailable)
-
-/* Case 5: Dem1 (Demand RATIO) is unavailable to Dem2 (Demand SEW/GE_SEW/LMUL).
- */
-DEF_UNAVAILABLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_FALSE,
- /*LMUL*/ DEMAND_FALSE, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ always_unavailable)
-DEF_UNAVAILABLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_FALSE,
- /*LMUL*/ DEMAND_FALSE, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ always_unavailable)
-
-/* Case 6: Dem1 (Demand SEW). */
-DEF_UNAVAILABLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_FALSE, /*RATIO*/ DEMAND_FALSE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ sew_unavailable_p)
-
-/* Case 7: Dem1 (Demand LMUL). */
-DEF_UNAVAILABLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_FALSE,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_FALSE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_FALSE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ lmul_unavailable_p)
-
-/* Case 8: Dem1 (Demand GE_SEW). */
-DEF_UNAVAILABLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_FALSE, /*RATIO*/ DEMAND_FALSE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ ge_sew_unavailable_p)
-
-/* Case 9: Dem1 (Demand GE_SEW + LMUL). */
-DEF_UNAVAILABLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_TRUE, /*RATIO*/ DEMAND_FALSE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ ge_sew_lmul_unavailable_p)
-
-/* Case 10: Dem1 (Demand GE_SEW + RATIO). */
-DEF_UNAVAILABLE_COND (/*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_TRUE,
- /*LMUL*/ DEMAND_FALSE, /*RATIO*/ DEMAND_TRUE,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_TRUE,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*AVL*/ DEMAND_ANY, /*SEW*/ DEMAND_ANY,
- /*LMUL*/ DEMAND_ANY, /*RATIO*/ DEMAND_ANY,
- /*NONZERO_AVL*/ DEMAND_ANY, /*GE_SEW*/ DEMAND_ANY,
- /*TAIL_POLICTY*/ DEMAND_ANY, /*MASK_POLICY*/ DEMAND_ANY,
- /*COND*/ ge_sew_ratio_unavailable_p)
-
-#undef DEF_INCOMPATIBLE_COND
-#undef DEF_SEW_LMUL_FUSE_RULE
-#undef DEF_UNAVAILABLE_COND
+/* Define SEW and LMUL rules. */
+DEF_SEW_LMUL_RULE (sew_lmul, sew_lmul, sew_lmul, eq_sew_lmul_p, eq_sew_lmul_p,
+ nop)
+DEF_SEW_LMUL_RULE (sew_lmul, ratio_only, sew_lmul, eq_ratio_p, eq_ratio_p, nop)
+DEF_SEW_LMUL_RULE (sew_lmul, sew_only, sew_lmul, eq_sew_p, eq_sew_p, nop)
+DEF_SEW_LMUL_RULE (sew_lmul, ge_sew, sew_lmul,
+ ge_next_sew_and_le_next_max_sew_p,
+ ge_next_sew_and_le_next_max_sew_p, nop)
+DEF_SEW_LMUL_RULE (sew_lmul, ratio_and_ge_sew, sew_lmul,
+ ge_next_sew_and_le_next_max_sew_and_has_next_ratio_p,
+ ge_next_sew_and_le_next_max_sew_and_has_next_ratio_p, nop)
+
+DEF_SEW_LMUL_RULE (ratio_only, sew_lmul, sew_lmul, eq_ratio_p, always_false,
+ use_next_sew_lmul)
+/* use_next_sew_lmul for testcase no change. */
+DEF_SEW_LMUL_RULE (ratio_only, ratio_only, ratio_only, eq_ratio_p, eq_ratio_p,
+ use_next_sew_lmul)
+DEF_SEW_LMUL_RULE (ratio_only, sew_only, sew_lmul, has_prev_ratio_p,
+ always_false, use_next_sew_with_prev_ratio)
+DEF_SEW_LMUL_RULE (ratio_only, ge_sew, ratio_and_ge_sew, has_prev_ratio_p,
+ always_false, use_next_sew_with_prev_ratio)
+DEF_SEW_LMUL_RULE (ratio_only, ratio_and_ge_sew, ratio_and_ge_sew, eq_ratio_p,
+ always_false, use_next_sew_lmul)
+
+DEF_SEW_LMUL_RULE (sew_only, sew_lmul, sew_lmul, eq_sew_p, always_false,
+ use_next_sew_lmul)
+DEF_SEW_LMUL_RULE (sew_only, ratio_only, sew_lmul, has_next_ratio_p,
+ always_false, modify_lmul_with_next_ratio)
+DEF_SEW_LMUL_RULE (sew_only, sew_only, sew_only, eq_sew_p, eq_sew_p, nop)
+DEF_SEW_LMUL_RULE (sew_only, ge_sew, sew_only,
+ ge_next_sew_and_le_next_max_sew_p, ge_next_sew_p, nop)
+DEF_SEW_LMUL_RULE (sew_only, ratio_and_ge_sew, sew_lmul,
+ ge_next_sew_and_le_next_max_sew_and_has_next_ratio_p,
+ always_false, modify_lmul_with_next_ratio)
+
+DEF_SEW_LMUL_RULE (ge_sew, sew_lmul, sew_lmul,
+ ge_prev_sew_and_le_prev_max_sew_p, always_false,
+ use_next_sew_lmul)
+DEF_SEW_LMUL_RULE (ge_sew, ratio_only, ratio_and_ge_sew, has_next_ratio_p,
+ always_false, modify_lmul_with_next_ratio)
+DEF_SEW_LMUL_RULE (ge_sew, sew_only, sew_only,
+ ge_prev_sew_and_le_prev_max_sew_p, always_false,
+ use_next_sew)
+DEF_SEW_LMUL_RULE (ge_sew, ge_sew, ge_sew, max_sew_overlap_p, ge_next_sew_p,
+ use_max_sew)
+DEF_SEW_LMUL_RULE (ge_sew, ratio_and_ge_sew, ratio_and_ge_sew,
+ max_sew_overlap_and_has_next_ratio_p, always_false,
+ use_max_sew_and_lmul_with_next_ratio)
+
+DEF_SEW_LMUL_RULE (ratio_and_ge_sew, sew_lmul, sew_lmul,
+ ge_prev_sew_and_le_prev_max_sew_and_eq_ratio_p, always_false,
+ use_next_sew_lmul)
+DEF_SEW_LMUL_RULE (ratio_and_ge_sew, ratio_only, ratio_and_ge_sew, eq_ratio_p,
+ eq_ratio_p, use_max_sew_and_lmul_with_prev_ratio)
+DEF_SEW_LMUL_RULE (ratio_and_ge_sew, sew_only, sew_only,
+ ge_prev_sew_and_le_prev_max_sew_and_has_prev_ratio_p,
+ always_false, use_next_sew_with_prev_ratio)
+DEF_SEW_LMUL_RULE (ratio_and_ge_sew, ge_sew, ratio_and_ge_sew,
+ max_sew_overlap_and_has_prev_ratio_p, ge_next_sew_p,
+ use_max_sew_and_lmul_with_prev_ratio)
+DEF_SEW_LMUL_RULE (ratio_and_ge_sew, ratio_and_ge_sew, ratio_and_ge_sew,
+ max_sew_overlap_and_eq_ratio_p, ge_next_sew_and_eq_ratio_p,
+ use_max_sew_and_lmul_with_prev_ratio)
+
+/* Define TAIL and MASK compatible and merge rules. */
+
+DEF_POLICY_RULE (tail_mask_policy, tail_mask_policy, tail_mask_policy,
+ comp_tail_mask_policy_p, eq_tail_mask_policy_p,
+ use_tail_mask_policy)
+DEF_POLICY_RULE (tail_mask_policy, tail_policy_only, tail_mask_policy,
+ comp_tail_policy_p, eq_tail_policy_p, use_tail_policy)
+DEF_POLICY_RULE (tail_mask_policy, mask_policy_only, tail_mask_policy,
+ comp_mask_policy_p, eq_mask_policy_p, use_mask_policy)
+DEF_POLICY_RULE (tail_mask_policy, ignore_policy, tail_mask_policy, always_true,
+ always_true, nop)
+
+DEF_POLICY_RULE (tail_policy_only, tail_mask_policy, tail_mask_policy,
+ comp_tail_policy_p, always_false, use_mask_policy)
+DEF_POLICY_RULE (tail_policy_only, tail_policy_only, tail_policy_only,
+ comp_tail_policy_p, eq_tail_policy_p, use_tail_policy)
+DEF_POLICY_RULE (tail_policy_only, mask_policy_only, tail_mask_policy,
+ always_true, always_false, use_mask_policy)
+DEF_POLICY_RULE (tail_policy_only, ignore_policy, tail_policy_only, always_true,
+ always_true, nop)
+
+DEF_POLICY_RULE (mask_policy_only, tail_mask_policy, tail_mask_policy,
+ comp_mask_policy_p, always_false, use_tail_policy)
+DEF_POLICY_RULE (mask_policy_only, tail_policy_only, tail_mask_policy,
+ always_true, always_false, use_tail_policy)
+DEF_POLICY_RULE (mask_policy_only, mask_policy_only, mask_policy_only,
+ comp_mask_policy_p, eq_mask_policy_p, use_mask_policy)
+DEF_POLICY_RULE (mask_policy_only, ignore_policy, mask_policy_only, always_true,
+ always_true, nop)
+
+DEF_POLICY_RULE (ignore_policy, tail_mask_policy, tail_mask_policy, always_true,
+ always_false, use_tail_mask_policy)
+DEF_POLICY_RULE (ignore_policy, tail_policy_only, tail_policy_only, always_true,
+ always_false, use_tail_policy)
+DEF_POLICY_RULE (ignore_policy, mask_policy_only, mask_policy_only, always_true,
+ always_false, use_mask_policy)
+DEF_POLICY_RULE (ignore_policy, ignore_policy, ignore_policy, always_true,
+ always_true, nop)
+
+/* Define AVL compatible and merge rules. */
+
+DEF_AVL_RULE (avl, avl, avl, equal_avl_p, equal_avl_p, nop)
+DEF_AVL_RULE (avl, non_zero_avl, avl, equal_avl_or_prev_non_zero_avl_p,
+ equal_avl_or_prev_non_zero_avl_p, nop)
+DEF_AVL_RULE (avl, ignore_avl, avl, always_true, always_true, nop)
+
+DEF_AVL_RULE (non_zero_avl, avl, avl,
+ equal_avl_or_next_non_zero_avl_and_can_use_next_avl_p,
+ always_false, use_next_avl_when_not_equal)
+
+DEF_AVL_RULE (non_zero_avl, non_zero_avl, non_zero_avl, always_true,
+ always_true, nop)
+DEF_AVL_RULE (non_zero_avl, ignore_avl, non_zero_avl, always_true, always_true,
+ nop)
+
+DEF_AVL_RULE (ignore_avl, avl, avl, can_use_next_avl_p, always_false,
+ use_next_avl)
+DEF_AVL_RULE (ignore_avl, non_zero_avl, non_zero_avl, can_use_next_avl_p,
+ always_false, use_next_avl)
+DEF_AVL_RULE (ignore_avl, ignore_avl, ignore_avl, always_true, always_true, nop)
+
+#undef DEF_SEW_LMUL_RULE
+#undef DEF_POLICY_RULE
+#undef DEF_AVL_RULE
@@ -23,466 +23,7 @@ along with GCC; see the file COPYING3. If not see
namespace riscv_vector {
-/* Classification of vsetvl instruction. */
-enum vsetvl_type
-{
- VSETVL_NORMAL,
- VSETVL_VTYPE_CHANGE_ONLY,
- VSETVL_DISCARD_RESULT,
- NUM_VSETVL_TYPE
-};
-enum emit_type
-{
- /* emit_insn directly. */
- EMIT_DIRECT,
- EMIT_BEFORE,
- EMIT_AFTER,
-};
-
-enum demand_type
-{
- DEMAND_AVL,
- DEMAND_SEW,
- DEMAND_LMUL,
- DEMAND_RATIO,
- DEMAND_NONZERO_AVL,
- DEMAND_GE_SEW,
- DEMAND_TAIL_POLICY,
- DEMAND_MASK_POLICY,
- NUM_DEMAND
-};
-
-enum demand_status
-{
- DEMAND_FALSE,
- DEMAND_TRUE,
- DEMAND_ANY,
-};
-
-enum fusion_type
-{
- INVALID_FUSION,
- VALID_AVL_FUSION,
- KILLED_AVL_FUSION
-};
-
-enum def_type
-{
- REAL_SET = 1 << 0,
- PHI_SET = 1 << 1,
- BB_HEAD_SET = 1 << 2,
- BB_END_SET = 1 << 3,
- /* ??? TODO: In RTL_SSA framework, we have REAL_SET,
- PHI_SET, BB_HEAD_SET, BB_END_SET and
- CLOBBER_DEF def_info types. Currently,
- we conservatively do not optimize clobber
- def since we don't see the case that we
- need to optimize it. */
- CLOBBER_DEF = 1 << 4
-};
-
-/* AVL info for RVV instruction. Most RVV instructions have AVL operand in
- implicit dependency. The AVL comparison between 2 RVV instructions is
- very important since it affects our decision whether we should insert
- a vsetvl instruction in this situation. AVL operand of all RVV instructions
- can only be either a const_int value with < 32 or a reg value which can be
- define by either a real RTL instruction or a PHI instruction. So we need a
- standalone method to define AVL comparison and we can not simpily use
- operator "==" to compare 2 RTX value since it's to strict which will make
- use miss a lot of optimization opportunities. This method handle these
- following cases:
-
- - Background:
- Insert-vsetvl PASS is working after RA.
-
- - Terminology:
- - pr: Pseudo-register.
- - hr: Hardware-register.
-
- - Case 1:
-
- Before RA:
- li pr138,13
- insn1 (implicit depend on pr138).
- li pr138,14
- insn2 (implicit depend on pr139).
-
- After RA:
- li hr5,13
- insn1 (implicit depend on hr5).
- li hr5,14
- insn2 (implicit depend on hr5).
-
- Correct IR after vsetvl PASS:
- li hr5,13
- vsetvl1 zero,hr5....
- insn1 (implicit depend on hr5).
- li hr5,14
- vsetvl2 zero,hr5....
- insn2 (implicit depend on hr5).
-
- In this case, both insn1 and insn2 are using hr5 as the same AVL.
- If we use "rtx_equal_p" or "REGNO (AVL1) == REGNO (AVL)", we will end
- up with missing the vsetvl2 instruction which creates wrong result.
-
- Note: Using "==" operator to compare 2 AVL RTX strictly can fix this
- issue. However, it is a too strict comparison method since not all member
- variables in RTX data structure are not neccessary to be the same. It will
- make us miss a lot of optimization opportunities.
-
- - Case 2:
-
- After RA:
- bb 0:
- li hr5,13
- bb 1:
- li hr5,14
- bb2:
- insn1 (implicit depend on hr5).
- insn2 (implicit depend on hr5).
-
- In this case, we may end up with different AVL RTX and produce redundant
- vsetvl instruction.
-
- VALUE is the implicit dependency in each RVV instruction.
- SOURCE is the source definition information of AVL operand. */
-class avl_info
-{
-private:
- rtx m_value;
- rtl_ssa::set_info *m_source;
-
-public:
- avl_info () : m_value (NULL_RTX), m_source (nullptr) {}
- avl_info (const avl_info &);
- avl_info (rtx, rtl_ssa::set_info *);
- rtx get_value () const { return m_value; }
- rtl_ssa::set_info *get_source () const { return m_source; }
- void set_source (rtl_ssa::set_info *set) { m_source = set; }
- bool single_source_equal_p (const avl_info &) const;
- bool multiple_source_equal_p (const avl_info &) const;
- avl_info &operator= (const avl_info &);
- bool operator== (const avl_info &) const;
- bool operator!= (const avl_info &) const;
-
- bool has_avl_imm () const
- {
- return get_value () && CONST_INT_P (get_value ());
- }
- bool has_avl_reg () const { return get_value () && REG_P (get_value ()); }
- bool has_avl_no_reg () const { return !get_value (); }
- bool has_non_zero_avl () const;
- bool has_avl () const { return get_value (); }
-};
-
-/* Basic structure to save VL/VTYPE information. */
-struct vl_vtype_info
-{
-protected:
- /* AVL can be either register or const_int. */
- avl_info m_avl;
- /* Fields from VTYPE. The VTYPE checking depend on the flag
- dem_* before. */
- uint8_t m_sew;
- riscv_vector::vlmul_type m_vlmul;
- uint8_t m_ratio;
- bool m_ta;
- bool m_ma;
-
-public:
- void set_sew (uint8_t sew) { m_sew = sew; }
- void set_vlmul (riscv_vector::vlmul_type vlmul) { m_vlmul = vlmul; }
- void set_ratio (uint8_t ratio) { m_ratio = ratio; }
- void set_ta (bool ta) { m_ta = ta; }
- void set_ma (bool ma) { m_ma = ma; }
-
- vl_vtype_info ()
- : m_avl (avl_info ()), m_sew (0), m_vlmul (riscv_vector::LMUL_RESERVED),
- m_ratio (0), m_ta (0), m_ma (0)
- {}
- vl_vtype_info (const vl_vtype_info &) = default;
- vl_vtype_info &operator= (const vl_vtype_info &) = default;
- vl_vtype_info (avl_info, uint8_t, riscv_vector::vlmul_type, uint8_t, bool,
- bool);
-
- bool operator== (const vl_vtype_info &) const;
- bool operator!= (const vl_vtype_info &) const;
-
- bool has_avl_imm () const { return m_avl.has_avl_imm (); }
- bool has_avl_reg () const { return m_avl.has_avl_reg (); }
- bool has_avl_no_reg () const { return m_avl.has_avl_no_reg (); }
- bool has_non_zero_avl () const { return m_avl.has_non_zero_avl (); };
- bool has_avl () const { return m_avl.has_avl (); }
-
- rtx get_avl () const { return m_avl.get_value (); }
- const avl_info &get_avl_info () const { return m_avl; }
- rtl_ssa::set_info *get_avl_source () const { return m_avl.get_source (); }
- void set_avl_source (rtl_ssa::set_info *set) { m_avl.set_source (set); }
- void set_avl_info (const avl_info &avl) { m_avl = avl; }
- uint8_t get_sew () const { return m_sew; }
- riscv_vector::vlmul_type get_vlmul () const { return m_vlmul; }
- uint8_t get_ratio () const { return m_ratio; }
- bool get_ta () const { return m_ta; }
- bool get_ma () const { return m_ma; }
-
- bool same_avl_p (const vl_vtype_info &) const;
- bool same_vtype_p (const vl_vtype_info &) const;
- bool same_vlmax_p (const vl_vtype_info &) const;
-};
-
-class vector_insn_info : public vl_vtype_info
-{
-private:
- enum state_type
- {
- UNINITIALIZED,
- VALID,
- UNKNOWN,
- EMPTY,
-
- /* The block is polluted as containing VSETVL instruction during dem
- backward propagation to gain better LCM optimization even though
- such VSETVL instruction is not really emit yet during this time. */
- DIRTY,
- };
-
- enum state_type m_state;
-
- bool m_demands[NUM_DEMAND];
-
- /* TODO: Assume INSN1 = INSN holding of definition of AVL.
- INSN2 = INSN that is inserted a vsetvl insn before.
- We may need to add a new member to save INSN of holding AVL.
- m_insn is holding the INSN that is inserted a vsetvl insn before in
- Phase 2. Ideally, most of the time INSN1 == INSN2. However, considering
- such case:
-
- vmv.x.s (INSN2)
- vle8.v (INSN1)
-
- If these 2 instructions are compatible, we should only issue a vsetvl INSN
- (with AVL included) before vmv.x.s, but vmv.x.s is not the INSN holding the
- definition of AVL. */
- rtl_ssa::insn_info *m_insn;
-
- friend class vector_infos_manager;
-
-public:
- vector_insn_info ()
- : vl_vtype_info (), m_state (UNINITIALIZED), m_demands{false},
- m_insn (nullptr)
- {}
-
- /* Parse the instruction to get VL/VTYPE information and demanding
- * information. */
- /* This is only called by simple_vsetvl subroutine when optimize == 0.
- Since RTL_SSA can not be enabled when optimize == 0, we don't initialize
- the m_insn. */
- void parse_insn (rtx_insn *);
- /* This is only called by lazy_vsetvl subroutine when optimize > 0.
- We use RTL_SSA framework to initialize the insn_info. */
- void parse_insn (rtl_ssa::insn_info *);
-
- bool operator>= (const vector_insn_info &) const;
- bool operator== (const vector_insn_info &) const;
-
- bool uninit_p () const { return m_state == UNINITIALIZED; }
- bool valid_p () const { return m_state == VALID; }
- bool unknown_p () const { return m_state == UNKNOWN; }
- bool empty_p () const { return m_state == EMPTY; }
- bool dirty_p () const { return m_state == DIRTY; }
- bool valid_or_dirty_p () const
- {
- return m_state == VALID || m_state == DIRTY;
- }
- bool available_p (const vector_insn_info &) const;
-
- static vector_insn_info get_unknown ()
- {
- vector_insn_info info;
- info.set_unknown ();
- return info;
- }
-
- void set_valid () { m_state = VALID; }
- void set_unknown () { m_state = UNKNOWN; }
- void set_empty () { m_state = EMPTY; }
- void set_dirty () { m_state = DIRTY; }
- void set_insn (rtl_ssa::insn_info *insn) { m_insn = insn; }
-
- bool demand_p (enum demand_type type) const { return m_demands[type]; }
- void demand (enum demand_type type) { m_demands[type] = true; }
- void set_demand (enum demand_type type, bool value)
- {
- m_demands[type] = value;
- }
- void fuse_avl (const vector_insn_info &, const vector_insn_info &);
- void fuse_sew_lmul (const vector_insn_info &, const vector_insn_info &);
- void fuse_tail_policy (const vector_insn_info &, const vector_insn_info &);
- void fuse_mask_policy (const vector_insn_info &, const vector_insn_info &);
-
- bool compatible_p (const vector_insn_info &) const;
- bool skip_avl_compatible_p (const vector_insn_info &) const;
- bool compatible_avl_p (const vl_vtype_info &) const;
- bool compatible_avl_p (const avl_info &) const;
- bool compatible_vtype_p (const vl_vtype_info &) const;
- bool compatible_p (const vl_vtype_info &) const;
- vector_insn_info local_merge (const vector_insn_info &) const;
- vector_insn_info global_merge (const vector_insn_info &, unsigned int) const;
-
- rtl_ssa::insn_info *get_insn () const { return m_insn; }
- const bool *get_demands (void) const { return m_demands; }
- rtx get_avl_or_vl_reg (void) const;
- rtx get_avl_reg_rtx (void) const
- {
- return gen_rtx_REG (Pmode, get_avl_source ()->regno ());
- }
- bool update_fault_first_load_avl (rtl_ssa::insn_info *);
-
- void dump (FILE *) const;
-};
-
-struct vector_block_info
-{
- /* The local_dem vector insn_info of the block. */
- vector_insn_info local_dem;
-
- /* The reaching_out vector insn_info of the block. */
- vector_insn_info reaching_out;
-
- /* The static execute probability of the demand info. */
- profile_probability probability;
-
- vector_block_info () = default;
-};
-
-class vector_infos_manager
-{
-public:
- auto_vec<vector_insn_info> vector_insn_infos;
- auto_vec<vector_block_info> vector_block_infos;
- auto_vec<vector_insn_info *> vector_exprs;
- hash_set<rtx_insn *> to_refine_vsetvls;
- hash_set<rtx_insn *> to_delete_vsetvls;
-
- struct edge_list *vector_edge_list;
- sbitmap *vector_kill;
- sbitmap *vector_del;
- sbitmap *vector_insert;
- sbitmap *vector_antic;
- sbitmap *vector_transp;
- sbitmap *vector_comp;
- sbitmap *vector_avin;
- sbitmap *vector_avout;
- sbitmap *vector_antin;
- sbitmap *vector_antout;
- sbitmap *vector_earliest;
-
- vector_infos_manager ();
-
- /* Create a new expr in expr list if it is not exist. */
- void create_expr (vector_insn_info &);
-
- /* Get the expr id of the pair of expr. */
- size_t get_expr_id (const vector_insn_info &) const;
-
- /* Return the number of expr that is set in the bitmap. */
- size_t expr_set_num (sbitmap) const;
-
- /* Get all relaxer expression id for corresponding vector info. */
- auto_vec<size_t> get_all_available_exprs (const vector_insn_info &) const;
-
- /* Return true if all expression set in bitmap are same AVL. */
- bool all_same_avl_p (const basic_block, sbitmap) const;
-
- /* Return true if all expression set in bitmap are same ratio. */
- bool all_same_ratio_p (sbitmap) const;
-
- bool all_avail_in_compatible_p (const basic_block) const;
- bool earliest_fusion_worthwhile_p (const basic_block) const;
- bool vsetvl_dominated_by_all_preds_p (const basic_block,
- const vector_insn_info &) const;
-
- bool to_delete_p (rtx_insn *rinsn)
- {
- if (to_delete_vsetvls.contains (rinsn))
- {
- to_delete_vsetvls.remove (rinsn);
- if (to_refine_vsetvls.contains (rinsn))
- to_refine_vsetvls.remove (rinsn);
- return true;
- }
- return false;
- }
- bool to_refine_p (rtx_insn *rinsn)
- {
- if (to_refine_vsetvls.contains (rinsn))
- {
- to_refine_vsetvls.remove (rinsn);
- return true;
- }
- return false;
- }
-
- void release (void);
- void create_bitmap_vectors (void);
- void free_bitmap_vectors (void);
-
- void dump (FILE *) const;
-};
-
-struct demands_pair
-{
- demand_status first[NUM_DEMAND];
- demand_status second[NUM_DEMAND];
- bool match_cond_p (const bool *dems1, const bool *dems2) const
- {
- for (unsigned i = 0; i < NUM_DEMAND; i++)
- {
- if (first[i] != DEMAND_ANY && first[i] != dems1[i])
- return false;
- if (second[i] != DEMAND_ANY && second[i] != dems2[i])
- return false;
- }
- return true;
- }
-};
-
-struct demands_cond
-{
- demands_pair pair;
- using CONDITION_TYPE
- = bool (*) (const vector_insn_info &, const vector_insn_info &);
- CONDITION_TYPE incompatible_p;
- bool dual_incompatible_p (const vector_insn_info &info1,
- const vector_insn_info &info2) const
- {
- return ((pair.match_cond_p (info1.get_demands (), info2.get_demands ())
- && incompatible_p (info1, info2))
- || (pair.match_cond_p (info2.get_demands (), info1.get_demands ())
- && incompatible_p (info2, info1)));
- }
-};
-
-struct demands_fuse_rule
-{
- demands_pair pair;
- bool demand_sew_p;
- bool demand_lmul_p;
- bool demand_ratio_p;
- bool demand_ge_sew_p;
-
- using NEW_SEW
- = unsigned (*) (const vector_insn_info &, const vector_insn_info &);
- using NEW_VLMUL
- = vlmul_type (*) (const vector_insn_info &, const vector_insn_info &);
- using NEW_RATIO
- = unsigned (*) (const vector_insn_info &, const vector_insn_info &);
- NEW_SEW new_sew;
- NEW_VLMUL new_vlmul;
- NEW_RATIO new_ratio;
-};
} // namespace riscv_vector
#endif
@@ -46,8 +46,8 @@ int32_t foo3 (int32_t *base, size_t vl)
** vl1re32\.v\tv[0-9]+,0\([a-x0-9]+\)
** vsetvli\tzero,[a-x0-9]+,e32,m1,t[au],m[au]
** vadd.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+
-** vsetvli\tzero,[a-x0-9]+,e32,m2,t[au],m[au]
** vmv.x.s\t[a-x0-9]+,\s*v[0-9]+
+** vsetvli\tzero,[a-x0-9]+,e32,m2,t[au],m[au]
** vmv.v.x\tv[0-9]+,\s*[a-x0-9]+
** vmv.x.s\t[a-x0-9]+,\s*v[0-9]+
** ret
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void
+foo (int cond, int vl, int *in, int *out, int n)
+{
+ if (cond > 30)
+ {
+ vint32m1_t v = __riscv_vle32_v_i32m1 ((int32_t *) in, vl);
+ __riscv_vse32_v_i32m1 ((int32_t *) out, v, vl);
+ }
+ else if (cond < 10)
+ {
+ vint8mf4_t v = __riscv_vle8_v_i8mf4 ((int8_t *) in, vl);
+ v = __riscv_vle8_v_i8mf4_tu (v, (int8_t *) in + 10, vl);
+ __riscv_vse8_v_i8mf4 ((int8_t *) out, v, vl);
+ }
+ else
+ {
+ vl = vl * 2;
+ }
+
+ for (int i = 0; i < n; i += 1)
+ {
+ vint16mf2_t v = __riscv_vle16_v_i16mf2 ((int16_t *) in + i, vl);
+ v = __riscv_vle16_v_i16mf2_tu (v, (int16_t *) in + i + 10, vl);
+ v = __riscv_vadd_vv_i16mf2 (v, v, vl);
+ __riscv_vse16_v_i16mf2 ((int16_t *) out + i, v, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-not {vsetvli\s+zero,zero,e16,mf2,t[au],m[au]} { target { no-opts "-O0" no-opts "-Os" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
new file mode 100644
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void
+foo (int i, int n, int m, int32_t *in, int32_t *out)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, i);
+ __riscv_vse32_v_i32m1 (out, v, i);
+ for (; i < n; i += 1)
+ {
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in + i, i);
+ __riscv_vse32_v_i32m1 (out + i, v, i);
+ for (int j = 0; j < m; j += 1)
+ {
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in + i * n + j, j);
+ __riscv_vse32_v_i32m1 (out + i * n + j, v, i);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 4 { target { { any-opts "-O2" "-O3" } && { no-opts "-g" "-funroll-loops" } } } } } */
@@ -20,7 +20,7 @@ void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
{
vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + j + 200), vl);
__riscv_vse32_v_f32mf2 ((float *)(out + i + j + 200), v, vl);
-
+
vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + j + 300), vl);
__riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + j + 300), v2, vl);
}
@@ -28,5 +28,6 @@ void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
}
}
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { { any-opts "-O2" "-O3" } && { no-opts "-g" "-funroll-loops" } } } } } */
/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { { any-opts "-O1" "-Os" } && { no-opts "-g" "-funroll-loops" } } } } } */
@@ -7,7 +7,7 @@ void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
{
vbool64_t mask = *(vbool64_t*) (in + 1000000);
for (size_t j = 0; j < m; j++){
-
+
size_t vl = 101;
for (size_t i = 0; i < n; i++)
{
@@ -20,7 +20,7 @@ void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
{
vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + j + 200), vl);
__riscv_vse32_v_f32mf2 ((float *)(out + i + j + 200), v, vl);
-
+
vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + j + 300), vl);
__riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + j + 300), v2, vl);
}
@@ -29,6 +29,6 @@ void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
/* { dg-final { scan-assembler {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 4 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { { any-opts "-O1" "-Os" "-O2" } && { no-opts "-g" "-funroll-loops" } } } } } */
/* { dg-final { scan-assembler-times {li\s+[a-x0-9]+,101} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler-times {li\s+[a-x0-9]+,102} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
@@ -21,5 +21,6 @@ void f (int8_t * restrict in, int8_t * restrict out, int n, int cond, size_t vl)
}
}
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
@@ -21,7 +21,11 @@ void f2 (void * restrict in, void * restrict out, int l, int n, int m)
}
}
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 4 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 4 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 5 { target { { any-opts "-O2"} && { any-opts "-g" "-funroll-loops" } } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 5 { target { { any-opts "-O2"} && { any-opts "-g" "-funroll-loops" } } } } } */
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 4 { target { { any-opts "-O1"} && { any-opts "-g" "-funroll-loops" } } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 4 { target { { any-opts "-O1"} && { any-opts "-g" "-funroll-loops" } } } } } */
+
/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler-times {addi\s+[a-x0-9]+,\s*[a-x0-9]+,\s*44} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
@@ -21,6 +21,11 @@ void f2 (void * restrict in, void * restrict out, int l, int n, int m)
}
}
-/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { { any-opts "-O2" } && { any-opts "-g" "-funroll-loops" } } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 3 { target { { any-opts "-O2" } && { any-opts "-g" "-funroll-loops" } } } } } */
+
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { { any-opts "-O1" } && { any-opts "-g" "-funroll-loops" } } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 3 { target { { any-opts "-O1" } && { any-opts "-g" "-funroll-loops" } } } } } */
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler-times {addi\s+[a-x0-9]+,\s*[a-x0-9]+,\s*44} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
@@ -6,7 +6,7 @@
void f (int8_t * restrict in, int8_t * restrict out, int l, int n, int m, size_t cond)
{
size_t vl = in[0] + 555;
-
+
if (cond) {
for (int i = 0; i < l; i++){
for (int j = 0; j < m; j++){
@@ -28,7 +28,7 @@ void f (int8_t * restrict in, int8_t * restrict out, int l, int n, int m, size_t
}
}
}
-
+
for (int i = 0; i < l; i++){
for (int j = 0; j < m; j++){
for (int k = 0; k < n; k++)
@@ -50,5 +50,8 @@ void f (int8_t * restrict in, int8_t * restrict out, int l, int n, int m, size_t
}
}
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,mf8,\s*tu,\s*m[au]} 3 { target { { any-opts "-O2"} && {no-opts "-g" "-funroll-loops" } } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { { any-opts "-O2"} && {no-opts "-g" "-funroll-loops" } } } } } */
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,mf8,\s*tu,\s*m[au]} 2 { target { { any-opts "-O1" "-Os"} && {no-opts "-g" "-funroll-loops" } } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { { any-opts "-O1" "-Os"} && {no-opts "-g" "-funroll-loops" } } } } } */
@@ -11,11 +11,11 @@ float f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned con
{
vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), __riscv_vsetvlmax_e32mf2 ());
__riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, __riscv_vsetvlmax_e32mf2 ());
-
+
vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), __riscv_vsetvlmax_e32mf2 ());
__riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, __riscv_vsetvlmax_e32mf2 ());
}
-
+
vfloat32m1_t v = *(vfloat32m1_t*)(in + 300000);
for (size_t i = 0; i < n; i++)
{
@@ -25,7 +25,7 @@ float f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned con
return __riscv_vfmv_f_s_f32m1_f32 (v);
}
-/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*3,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*3,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetivli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
@@ -16,6 +16,6 @@ float f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned con
*(vfloat32m1_t*)(out + 100000) = v;
}
-/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*3,\s*e64,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,\s*3,\s*e64,\s*m1,\s*t[au],\s*m[au]} { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler-not {vsetvli} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
@@ -15,6 +15,6 @@ float f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned con
*(vfloat32m1_t*)(out + 100000) = v;
}
-/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*3,\s*e64,\s*m4,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,\s*3,\s*e32,\s*m2,\s*t[au],\s*m[au]} { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler-not {vsetvli} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
@@ -16,6 +16,6 @@ float f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned con
return __riscv_vfmv_f_s_f32m1_f32 (v);
}
-/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*3,\s*e32,\s*m2,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,\s*3,\s*e32,\s*m2,\s*t[au],\s*m[au]} { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler-not {vsetvli} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
@@ -16,17 +16,18 @@ void f (int8_t * restrict in, int8_t * restrict out, int n, int cond)
for (int i = 0 ; i < n * n; i++)
out[i] = out[i] + out[i];
-
+
for (int i = 0 ; i < n * n * n; i++)
out[i] = out[i] * out[i];
for (int i = 0 ; i < n * n * n * n; i++)
out[i] = out[i] * out[i];
-
+
for (int i = 0 ; i < n * n * n * n; i++) {
vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 900 + i, 5);
__riscv_vse8_v_i8mf8 (out + 900 + i, v, 5);
}
}
-/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*5,\s*e8,\s*mf8,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*5,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,\s*5,\s*e8,\s*mf8,\s*tu,\s*m[au]} { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
@@ -23,5 +23,5 @@ void f (int32_t * a, int32_t * b, int n)
}
-/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*(?:e8,mf4|e32,m1),\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
@@ -17,4 +17,4 @@ void f (int32_t *a, int32_t *b, int n)
}
}
-/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
similarity index 100%
rename from gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-1.c
rename to gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-1.c
similarity index 100%
rename from gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-2.c
rename to gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-2.c
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zve64f_zvfh -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void foo(_Float16 y, int16_t z, int64_t *i64p)
+{
+ vint64m1_t vx =__riscv_vle64_v_i64m1 (i64p, 1);
+ vx = __riscv_vadd_vv_i64m1 (vx, vx, 1);
+ vint16m1_t vz =__riscv_vmv_s_x_i16m1 (z, 1);
+ vfloat16m1_t vy =__riscv_vfmv_s_f_f16m1 (y, 1);
+ asm volatile ("# use %0 %1" : : "vr"(vx), "vr" (vy), "vr" (vz));
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zve64f_zvfh -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void foo(_Float16 y, int16_t z, int64_t *i64p)
+{
+ vint64m1_t vx =__riscv_vle64_v_i64m1 (i64p, 1);
+ vx = __riscv_vadd_vv_i64m1 (vx, vx, 1);
+ vfloat16m1_t vy =__riscv_vfmv_s_f_f16m1 (y, 1);
+ vint16m1_t vz =__riscv_vmv_s_x_i16m1 (z, 1);
+ asm volatile ("# use %0 %1" : : "vr"(vx), "vr" (vy), "vr" (vz));
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e16,\s*mf4,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */
@@ -88,8 +88,8 @@ void f (void * restrict in, void * restrict out, int n, int cond)
}
}
-/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 10 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-not {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au]} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-not {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-not {vsetvli\s+[a-x0-9]+,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 19 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 10 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
@@ -80,8 +80,8 @@ void f (void * restrict in, void * restrict out, int n, int cond)
}
}
-/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 9 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-not {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au]} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-not {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-not {vsetvli\s+[a-x0-9]+,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 17 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 9 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
@@ -36,4 +36,3 @@ void f2 (int32_t * restrict in, int32_t * restrict in2, int32_t * restrict out,
}
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 4 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
@@ -27,4 +27,4 @@ void f2 (int32_t * restrict in, int32_t * restrict in2, int32_t * restrict out,
}
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 4 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+size_t f(int8_t *base, int8_t *out, size_t vl, size_t m, vbool64_t mask) {
+ size_t avl = __riscv_vsetvl_e8mf8(vl);
+
+ for (size_t i = 0; i < m; i++) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i, avl);
+ __riscv_vse8_v_i8mf8(out + i, v0, avl);
+ }
+
+ return avl;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
@@ -13,4 +13,4 @@ void f(int8_t *base, int8_t *out, size_t vl, size_t m, vbool64_t mask) {
}
/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
\ No newline at end of file
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
@@ -4,7 +4,7 @@
#include "riscv_vector.h"
void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond, int avl) {
-
+
size_t vl;
if (cond)
vl = __riscv_vsetvl_e32m1(avl);
@@ -18,5 +18,5 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
}
}
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
@@ -16,5 +16,7 @@ void f(int8_t *base, int8_t *out, size_t vl, size_t m, size_t n) {
}
}
-/* { dg-final { scan-assembler {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf4,\s*t[au],\s*m[au]} { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au]} { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { { any-opts "-O2" "-O3" } && { no-opts "-g" "-funroll-loops" } } } } } */
@@ -33,4 +33,4 @@ void f(int8_t *base, int8_t *out, size_t vl, size_t m, size_t k) {
/* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*4} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
/* { dg-final { scan-assembler-times {srli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*8} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 6 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 5 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */