@@ -2721,7 +2721,6 @@ public:
}
};
-
void
pre_vsetvl::compute_avl_def_data ()
{
@@ -3241,6 +3240,367 @@ pre_vsetvl::fuse_local_vsetvl_info ()
}
+bool
+pre_vsetvl::earliest_fuse_vsetvl_info ()
+{
+ compute_avl_def_data ();
+ compute_vsetvl_def_data ();
+ compute_lcm_local_properties ();
+
+ unsigned num_exprs = m_exprs.length ();
+ struct edge_list *m_edges = create_edge_list ();
+ unsigned num_edges = NUM_EDGES (m_edges);
+ sbitmap *antin
+ = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
+ sbitmap *antout
+ = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
+
+ sbitmap *earliest = sbitmap_vector_alloc (num_edges, num_exprs);
+
+ compute_available (m_avloc, m_kill, m_avout, m_avin);
+ compute_antinout_edge (m_antloc, m_transp, antin, antout);
+ compute_earliest (m_edges, num_exprs, antin, antout, m_avout, m_kill,
+ earliest);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\n Compute LCM earliest insert data:\n\n");
+ fprintf (dump_file, " Expression List (%u):\n", num_exprs);
+ for (unsigned i = 0; i < num_exprs; i++)
+ {
+ const auto &info = *m_exprs[i];
+ fprintf (dump_file, " Expr[%u]: ", i);
+ info.dump (dump_file, " ");
+ }
+ fprintf (dump_file, "\n bitmap data:\n");
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ unsigned int i = bb->index ();
+ fprintf (dump_file, " BB %u:\n", i);
+ fprintf (dump_file, " avloc: ");
+ dump_bitmap_file (dump_file, m_avloc[i]);
+ fprintf (dump_file, " kill: ");
+ dump_bitmap_file (dump_file, m_kill[i]);
+ fprintf (dump_file, " antloc: ");
+ dump_bitmap_file (dump_file, m_antloc[i]);
+ fprintf (dump_file, " transp: ");
+ dump_bitmap_file (dump_file, m_transp[i]);
+
+ fprintf (dump_file, " avin: ");
+ dump_bitmap_file (dump_file, m_avin[i]);
+ fprintf (dump_file, " avout: ");
+ dump_bitmap_file (dump_file, m_avout[i]);
+ fprintf (dump_file, " antin: ");
+ dump_bitmap_file (dump_file, antin[i]);
+ fprintf (dump_file, " antout: ");
+ dump_bitmap_file (dump_file, antout[i]);
+ }
+ fprintf (dump_file, "\n");
+ fprintf (dump_file, " earliest:\n");
+ for (unsigned ed = 0; ed < num_edges; ed++)
+ {
+ edge eg = INDEX_EDGE (m_edges, ed);
+
+ if (bitmap_empty_p (earliest[ed]))
+ continue;
+ fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index,
+ eg->dest->index);
+ dump_bitmap_file (dump_file, earliest[ed]);
+ }
+ fprintf (dump_file, "\n");
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, " Fused global info result:\n");
+ }
+
+ bool changed = false;
+ for (unsigned ed = 0; ed < num_edges; ed++)
+ {
+ sbitmap e = earliest[ed];
+ if (bitmap_empty_p (e))
+ continue;
+
+ unsigned int expr_index;
+ sbitmap_iterator sbi;
+ EXECUTE_IF_SET_IN_BITMAP (e, 0, expr_index, sbi)
+ {
+ vsetvl_info &curr_info = *m_exprs[expr_index];
+ if (!curr_info.valid_p ())
+ continue;
+
+ edge eg = INDEX_EDGE (m_edges, ed);
+ if (eg->probability == profile_probability::never ())
+ continue;
+ if (eg->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
+ || eg->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
+ continue;
+
+ vsetvl_block_info &src_block_info = get_block_info (eg->src);
+ vsetvl_block_info &dest_block_info = get_block_info (eg->dest);
+
+ if (src_block_info.probability
+ == profile_probability::uninitialized ())
+ continue;
+
+ if (src_block_info.empty_p ())
+ {
+ vsetvl_info new_curr_info = curr_info;
+ new_curr_info.set_bb (crtl->ssa->bb (eg->dest));
+ bool has_compatible_p = false;
+ unsigned int def_expr_index;
+ sbitmap_iterator sbi2;
+ EXECUTE_IF_SET_IN_BITMAP (
+ m_vsetvl_def_in[new_curr_info.get_bb ()->index ()], 0,
+ def_expr_index, sbi2)
+ {
+ vsetvl_info &prev_info = *m_vsetvl_def_exprs[def_expr_index];
+ if (!prev_info.valid_p ())
+ continue;
+ if (m_dem.compatible_p (prev_info, new_curr_info))
+ {
+ has_compatible_p = true;
+ break;
+ }
+ }
+ if (!has_compatible_p)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ " Forbidden lift up vsetvl info into bb %u "
+ "since there is no vsetvl info that reaching in "
+ "is compatible with it:",
+ eg->src->index);
+ curr_info.dump (dump_file, " ");
+ }
+ continue;
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ " Set empty bb %u to info:", eg->src->index);
+ curr_info.dump (dump_file, " ");
+ }
+ src_block_info.set_info (curr_info);
+ src_block_info.probability = dest_block_info.probability;
+ changed = true;
+ }
+ else if (src_block_info.has_info ())
+ {
+ vsetvl_info &prev_info = src_block_info.get_exit_info ();
+ gcc_assert (prev_info.valid_p ());
+
+ if (m_dem.compatible_p (prev_info, curr_info))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, " Fuse curr info since prev info "
+ "compatible with it:\n");
+ fprintf (dump_file, " prev_info: ");
+ prev_info.dump (dump_file, " ");
+ fprintf (dump_file, " curr_info: ");
+ curr_info.dump (dump_file, " ");
+ }
+ m_dem.merge (prev_info, curr_info);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, " prev_info after fused: ");
+ prev_info.dump (dump_file, " ");
+ fprintf (dump_file, "\n");
+ }
+ changed = true;
+ if (src_block_info.has_info ())
+ src_block_info.probability += dest_block_info.probability;
+ }
+ else if (src_block_info.has_info ()
+ && !m_dem.compatible_p (prev_info, curr_info))
+ {
+ /* Cancel lift up if probabilities are equal. */
+ if (successors_probability_equal_p (eg->src))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ " Change empty bb %u to from:",
+ eg->src->index);
+ prev_info.dump (dump_file, " ");
+ fprintf (dump_file,
+ " to (higher probability):");
+ curr_info.dump (dump_file, " ");
+ }
+ src_block_info.set_empty_info ();
+ src_block_info.probability
+ = profile_probability::uninitialized ();
+ changed = true;
+ }
+ /* Choose the one with higher probability. */
+ else if (dest_block_info.probability
+ > src_block_info.probability)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ " Change empty bb %u to from:",
+ eg->src->index);
+ prev_info.dump (dump_file, " ");
+ fprintf (dump_file,
+ " to (higher probability):");
+ curr_info.dump (dump_file, " ");
+ }
+ src_block_info.set_info (curr_info);
+ src_block_info.probability = dest_block_info.probability;
+ changed = true;
+ }
+ }
+ }
+ else
+ {
+ vsetvl_info &prev_info = src_block_info.get_exit_info ();
+ if (!prev_info.valid_p ()
+ || m_dem.available_p (prev_info, curr_info))
+ continue;
+
+ if (m_dem.compatible_p (prev_info, curr_info))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, " Fuse curr info since prev info "
+ "compatible with it:\n");
+ fprintf (dump_file, " prev_info: ");
+ prev_info.dump (dump_file, " ");
+ fprintf (dump_file, " curr_info: ");
+ curr_info.dump (dump_file, " ");
+ }
+ m_dem.merge (prev_info, curr_info);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, " prev_info after fused: ");
+ prev_info.dump (dump_file, " ");
+ fprintf (dump_file, "\n");
+ }
+ changed = true;
+ }
+ }
+ }
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\n");
+ }
+
+ sbitmap_vector_free (antin);
+ sbitmap_vector_free (antout);
+ sbitmap_vector_free (earliest);
+ free_edge_list (m_edges);
+
+ return changed;
+}
+
+void
+pre_vsetvl::pre_global_vsetvl_info ()
+{
+ compute_avl_def_data ();
+ compute_vsetvl_def_data ();
+ compute_lcm_local_properties ();
+
+ unsigned num_exprs = m_exprs.length ();
+ m_edges = pre_edge_lcm_avs (num_exprs, m_transp, m_avloc, m_antloc, m_kill,
+ m_avin, m_avout, &m_insert, &m_del);
+ unsigned num_edges = NUM_EDGES (m_edges);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\n Compute LCM insert and delete data:\n\n");
+ fprintf (dump_file, " Expression List (%u):\n", num_exprs);
+ for (unsigned i = 0; i < num_exprs; i++)
+ {
+ const auto &info = *m_exprs[i];
+ fprintf (dump_file, " Expr[%u]: ", i);
+ info.dump (dump_file, " ");
+ }
+ fprintf (dump_file, "\n bitmap data:\n");
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ unsigned i = bb->index ();
+ fprintf (dump_file, " BB %u:\n", i);
+ fprintf (dump_file, " avloc: ");
+ dump_bitmap_file (dump_file, m_avloc[i]);
+ fprintf (dump_file, " kill: ");
+ dump_bitmap_file (dump_file, m_kill[i]);
+ fprintf (dump_file, " antloc: ");
+ dump_bitmap_file (dump_file, m_antloc[i]);
+ fprintf (dump_file, " transp: ");
+ dump_bitmap_file (dump_file, m_transp[i]);
+
+ fprintf (dump_file, " avin: ");
+ dump_bitmap_file (dump_file, m_avin[i]);
+ fprintf (dump_file, " avout: ");
+ dump_bitmap_file (dump_file, m_avout[i]);
+ fprintf (dump_file, " del: ");
+ dump_bitmap_file (dump_file, m_del[i]);
+ }
+ fprintf (dump_file, "\n");
+ fprintf (dump_file, " insert:\n");
+ for (unsigned ed = 0; ed < num_edges; ed++)
+ {
+ edge eg = INDEX_EDGE (m_edges, ed);
+
+ if (bitmap_empty_p (m_insert[ed]))
+ continue;
+ fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index,
+ eg->dest->index);
+ dump_bitmap_file (dump_file, m_insert[ed]);
+ }
+ }
+
+ /* Remove vsetvl infos as LCM suggest */
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ sbitmap d = m_del[bb->index ()];
+ if (bitmap_count_bits (d) == 0)
+ continue;
+ gcc_assert (bitmap_count_bits (d) == 1);
+ unsigned expr_index = bitmap_first_set_bit (d);
+ vsetvl_info &info = *m_exprs[expr_index];
+ gcc_assert (info.valid_p ());
+ gcc_assert (info.get_bb () == bb);
+ const vsetvl_block_info &block_info = get_block_info (info.get_bb ());
+ gcc_assert (block_info.get_entry_info () == info);
+ info.set_delete ();
+ }
+
+ for (const bb_info *bb : crtl->ssa->bbs ())
+ {
+ vsetvl_block_info &block_info = get_block_info (bb);
+ if (block_info.empty_p ())
+ continue;
+ vsetvl_info &curr_info = block_info.get_entry_info ();
+ if (curr_info.delete_p ())
+ {
+ if (block_info.infos.is_empty ())
+ continue;
+ curr_info = block_info.infos[0];
+ }
+ if (curr_info.valid_p () && !curr_info.vl_use_by_non_rvv_insn_p ()
+ && preds_has_same_avl_p (curr_info))
+ curr_info.set_change_vtype_only ();
+
+ vsetvl_info prev_info = vsetvl_info ();
+ prev_info.set_empty ();
+ for (auto &curr_info : block_info.infos)
+ {
+ if (prev_info.valid_p () && curr_info.valid_p ()
+ && m_dem.avl_available_p (prev_info, curr_info))
+ curr_info.set_change_vtype_only ();
+ prev_info = curr_info;
+ }
+ }
+}
+
const pass_data pass_data_vsetvl = {
RTL_PASS, /* type */
"vsetvl", /* name */
@@ -3379,648 +3739,6 @@ make_pass_vsetvl (gcc::context *ctxt)
return new pass_vsetvl (ctxt);
}
-/* Assemble the candidates expressions for LCM. */
-void
-pass_vsetvl::prune_expressions (void)
-{
- for (const bb_info *bb : crtl->ssa->bbs ())
- {
- if (m_vector_manager->vector_block_infos[bb->index ()]
- .local_dem.valid_or_dirty_p ())
- m_vector_manager->create_expr (
- m_vector_manager->vector_block_infos[bb->index ()].local_dem);
- if (m_vector_manager->vector_block_infos[bb->index ()]
- .reaching_out.valid_or_dirty_p ())
- m_vector_manager->create_expr (
- m_vector_manager->vector_block_infos[bb->index ()].reaching_out);
- }
-
- if (dump_file)
- {
- fprintf (dump_file, "\nThe total VSETVL expression num = %d\n",
- m_vector_manager->vector_exprs.length ());
- fprintf (dump_file, "Expression List:\n");
- for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
- {
- fprintf (dump_file, "Expr[%ld]:\n", i);
- m_vector_manager->vector_exprs[i]->dump (dump_file);
- fprintf (dump_file, "\n");
- }
- }
-}
-
-/* Compute the local properties of each recorded expression.
-
- Local properties are those that are defined by the block, irrespective of
- other blocks.
-
- An expression is transparent in a block if its operands are not modified
- in the block.
-
- An expression is computed (locally available) in a block if it is computed
- at least once and expression would contain the same value if the
- computation was moved to the end of the block.
-
- An expression is locally anticipatable in a block if it is computed at
- least once and expression would contain the same value if the computation
- was moved to the beginning of the block. */
-void
-pass_vsetvl::compute_local_properties (void)
-{
- /* - If T is locally available at the end of a block, then T' must be
- available at the end of the same block. Since some optimization has
- occurred earlier, T' might not be locally available, however, it must
- have been previously computed on all paths. As a formula, T at AVLOC(B)
- implies that T' at AVOUT(B).
- An "available occurrence" is one that is the last occurrence in the
- basic block and the operands are not modified by following statements in
- the basic block [including this insn].
-
- - If T is locally anticipated at the beginning of a block, then either
- T', is locally anticipated or it is already available from previous
- blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
- ANTLOC(B) at AVIN(B).
- An "anticipatable occurrence" is one that is the first occurrence in the
- basic block, the operands are not modified in the basic block prior
- to the occurrence and the output is not used between the start of
- the block and the occurrence. */
-
- basic_block cfg_bb;
- for (const bb_info *bb : crtl->ssa->bbs ())
- {
- unsigned int curr_bb_idx = bb->index ();
- if (curr_bb_idx == ENTRY_BLOCK || curr_bb_idx == EXIT_BLOCK)
- continue;
- const auto local_dem
- = m_vector_manager->vector_block_infos[curr_bb_idx].local_dem;
- const auto reaching_out
- = m_vector_manager->vector_block_infos[curr_bb_idx].reaching_out;
-
- /* Compute transparent. */
- for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
- {
- const auto *expr = m_vector_manager->vector_exprs[i];
- if (local_dem.valid_or_dirty_p () || local_dem.unknown_p ())
- bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], i);
- else if (expr->has_avl_reg ())
- {
- rtx reg = expr->get_avl_or_vl_reg ();
- for (const insn_info *insn : bb->real_nondebug_insns ())
- {
- if (find_access (insn->defs (), REGNO (reg)))
- {
- bitmap_clear_bit (
- m_vector_manager->vector_transp[curr_bb_idx], i);
- break;
- }
- else if (vlmax_avl_p (expr->get_avl ())
- && find_access (insn->uses (), REGNO (reg)))
- {
- bitmap_clear_bit (
- m_vector_manager->vector_transp[curr_bb_idx], i);
- break;
- }
- }
- }
- }
-
- /* Compute anticipatable occurrences. */
- if (local_dem.valid_or_dirty_p ())
- if (anticipatable_occurrence_p (bb, local_dem))
- bitmap_set_bit (m_vector_manager->vector_antic[curr_bb_idx],
- m_vector_manager->get_expr_id (local_dem));
-
- /* Compute available occurrences. */
- if (reaching_out.valid_or_dirty_p ())
- {
- auto_vec<size_t> available_list
- = m_vector_manager->get_all_available_exprs (reaching_out);
- for (size_t i = 0; i < available_list.length (); i++)
- {
- const vector_insn_info *expr
- = m_vector_manager->vector_exprs[available_list[i]];
- if (available_occurrence_p (bb, *expr))
- bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx],
- available_list[i]);
- }
- }
-
- if (loop_basic_block_p (bb->cfg_bb ()) && local_dem.valid_or_dirty_p ()
- && reaching_out.valid_or_dirty_p ()
- && !local_dem.compatible_p (reaching_out))
- bitmap_clear_bit (m_vector_manager->vector_antic[curr_bb_idx],
- m_vector_manager->get_expr_id (local_dem));
- }
-
- /* Compute kill for each basic block using:
-
- ~(TRANSP | COMP)
- */
-
- FOR_EACH_BB_FN (cfg_bb, cfun)
- {
- bitmap_ior (m_vector_manager->vector_kill[cfg_bb->index],
- m_vector_manager->vector_transp[cfg_bb->index],
- m_vector_manager->vector_comp[cfg_bb->index]);
- bitmap_not (m_vector_manager->vector_kill[cfg_bb->index],
- m_vector_manager->vector_kill[cfg_bb->index]);
- }
-
- FOR_EACH_BB_FN (cfg_bb, cfun)
- {
- edge e;
- edge_iterator ei;
-
- /* If the current block is the destination of an abnormal edge, we
- kill all trapping (for PRE) and memory (for hoist) expressions
- because we won't be able to properly place the instruction on
- the edge. So make them neither anticipatable nor transparent.
- This is fairly conservative.
-
- ??? For hoisting it may be necessary to check for set-and-jump
- instructions here, not just for abnormal edges. The general problem
- is that when an expression cannot not be placed right at the end of
- a basic block we should account for any side-effects of a subsequent
- jump instructions that could clobber the expression. It would
- be best to implement this check along the lines of
- should_hoist_expr_to_dom where the target block is already known
- and, hence, there's no need to conservatively prune expressions on
- "intermediate" set-and-jump instructions. */
- FOR_EACH_EDGE (e, ei, cfg_bb->preds)
- if (e->flags & EDGE_COMPLEX)
- {
- bitmap_clear (m_vector_manager->vector_antic[cfg_bb->index]);
- bitmap_clear (m_vector_manager->vector_transp[cfg_bb->index]);
- }
- }
-}
-
-/* Fuse demand info for earliest edge. */
-bool
-pass_vsetvl::earliest_fusion (void)
-{
- bool changed_p = false;
- for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++)
- {
- for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
- {
- auto &expr = *m_vector_manager->vector_exprs[i];
- if (expr.empty_p ())
- continue;
- edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed);
- /* If it is the edge that we never reach, skip its possible PRE
- fusion conservatively. */
- if (eg->probability == profile_probability::never ())
- break;
- if (eg->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
- || eg->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
- break;
- if (bitmap_bit_p (m_vector_manager->vector_earliest[ed], i))
- {
- auto &src_block_info = get_block_info (eg->src);
- auto &dest_block_info = get_block_info (eg->dest);
- if (src_block_info.reaching_out.unknown_p ())
- break;
-
- gcc_assert (!(eg->flags & EDGE_ABNORMAL));
- vector_insn_info new_info = vector_insn_info ();
- profile_probability prob = src_block_info.probability;
- /* We don't fuse user vsetvl into EMPTY or
- DIRTY (EMPTY but polluted) block for these
- following reasons:
-
- - The user vsetvl instruction is configured as
- no side effects that the previous passes
- (GSCE, Loop-invariant, ..., etc)
- should be able to do a good job on optimization
- of user explicit vsetvls so we don't need to
- PRE optimization (The user vsetvls should be
- on the optimal local already before this pass)
- again for user vsetvls in VSETVL PASS here
- (Phase 3 && Phase 4).
-
- - Allowing user vsetvls be optimized in PRE
- optimization here (Phase 3 && Phase 4) will
- complicate the codes so much so we prefer user
- vsetvls be optimized in post-optimization
- (Phase 5 && Phase 6). */
- if (vsetvl_insn_p (expr.get_insn ()->rtl ()))
- {
- if (src_block_info.reaching_out.empty_p ())
- continue;
- else if (src_block_info.reaching_out.dirty_p ()
- && !src_block_info.reaching_out.compatible_p (expr))
- {
- new_info.set_empty ();
- /* Update probability as uninitialized status so that
- we won't try to fuse any demand info into such EMPTY
- block any more. */
- prob = profile_probability::uninitialized ();
- update_block_info (eg->src->index, prob, new_info);
- continue;
- }
- }
-
- if (src_block_info.reaching_out.empty_p ())
- {
- if (src_block_info.probability
- == profile_probability::uninitialized ())
- continue;
- new_info = expr.global_merge (expr, eg->src->index);
- new_info.set_dirty ();
- prob = dest_block_info.probability;
- update_block_info (eg->src->index, prob, new_info);
- changed_p = true;
- }
- else if (src_block_info.reaching_out.dirty_p ())
- {
- /* DIRTY -> DIRTY or VALID -> DIRTY. */
- if (demands_can_be_fused_p (src_block_info.reaching_out,
- expr))
- {
- new_info = src_block_info.reaching_out.global_merge (
- expr, eg->src->index);
- new_info.set_dirty ();
- prob += dest_block_info.probability;
- }
- else if (!src_block_info.reaching_out.compatible_p (expr)
- && !m_vector_manager->earliest_fusion_worthwhile_p (
- eg->src))
- {
- new_info.set_empty ();
- prob = profile_probability::uninitialized ();
- }
- else if (!src_block_info.reaching_out.compatible_p (expr)
- && dest_block_info.probability
- > src_block_info.probability)
- {
- new_info = expr;
- new_info.set_dirty ();
- prob = dest_block_info.probability;
- }
- else
- continue;
- update_block_info (eg->src->index, prob, new_info);
- changed_p = true;
- }
- else
- {
- rtx vl = NULL_RTX;
- if (vsetvl_insn_p (
- src_block_info.reaching_out.get_insn ()->rtl ())
- && vsetvl_dominated_by_p (eg->src, expr,
- src_block_info.reaching_out,
- true))
- ;
- else if (!demands_can_be_fused_p (src_block_info.reaching_out,
- expr))
- continue;
- else if (!earliest_pred_can_be_fused_p (
- crtl->ssa->bb (eg->src),
- src_block_info.reaching_out, expr, &vl))
- continue;
-
- vector_insn_info new_info
- = src_block_info.reaching_out.global_merge (expr,
- eg->src->index);
-
- prob = std::max (dest_block_info.probability,
- src_block_info.probability);
- change_vsetvl_insn (new_info.get_insn (), new_info, vl);
- update_block_info (eg->src->index, prob, new_info);
- changed_p = true;
- }
- }
- }
- }
- return changed_p;
-}
-
-/* Fuse VSETVL demand info according LCM computed location. */
-void
-pass_vsetvl::vsetvl_fusion (void)
-{
- /* Fuse VSETVL demand info until VSETVL CFG fixed. */
- bool changed_p = true;
- int fusion_no = 0;
- while (changed_p)
- {
- changed_p = false;
- fusion_no++;
- prune_expressions ();
- m_vector_manager->create_bitmap_vectors ();
- compute_local_properties ();
- /* Compute global availability. */
- compute_available (m_vector_manager->vector_comp,
- m_vector_manager->vector_kill,
- m_vector_manager->vector_avout,
- m_vector_manager->vector_avin);
- /* Compute global anticipatability. */
- compute_antinout_edge (m_vector_manager->vector_antic,
- m_vector_manager->vector_transp,
- m_vector_manager->vector_antin,
- m_vector_manager->vector_antout);
- /* Compute earliestness. */
- compute_earliest (m_vector_manager->vector_edge_list,
- m_vector_manager->vector_exprs.length (),
- m_vector_manager->vector_antin,
- m_vector_manager->vector_antout,
- m_vector_manager->vector_avout,
- m_vector_manager->vector_kill,
- m_vector_manager->vector_earliest);
- changed_p |= earliest_fusion ();
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "\nEARLIEST fusion %d\n", fusion_no);
- m_vector_manager->dump (dump_file);
- }
- m_vector_manager->free_bitmap_vectors ();
- if (!m_vector_manager->vector_exprs.is_empty ())
- m_vector_manager->vector_exprs.release ();
- }
-}
-
-/* Return true if VSETVL in the block can be refined as vsetvl zero,zero. */
-bool
-pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb,
- const vector_insn_info &info) const
-{
- if (!m_vector_manager->all_same_ratio_p (
- m_vector_manager->vector_avin[cfg_bb->index]))
- return false;
-
- if (!m_vector_manager->all_same_avl_p (
- cfg_bb, m_vector_manager->vector_avin[cfg_bb->index]))
- return false;
-
- size_t expr_id
- = bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]);
- if (!m_vector_manager->vector_exprs[expr_id]->same_vlmax_p (info))
- return false;
- if (!m_vector_manager->vector_exprs[expr_id]->compatible_avl_p (info))
- return false;
-
- edge e;
- edge_iterator ei;
- bool all_valid_p = true;
- FOR_EACH_EDGE (e, ei, cfg_bb->preds)
- {
- if (bitmap_empty_p (m_vector_manager->vector_avout[e->src->index]))
- {
- all_valid_p = false;
- break;
- }
- }
-
- if (!all_valid_p)
- return false;
- return true;
-}
-
-/* Optimize athe case like this:
-
- bb 0:
- vsetvl 0 a5,zero,e8,mf8
- insn 0 (demand SEW + LMUL)
- bb 1:
- vsetvl 1 a5,zero,e16,mf4
- insn 1 (demand SEW + LMUL)
-
- In this case, we should be able to refine
- vsetvl 1 into vsetvl zero, zero according AVIN. */
-void
-pass_vsetvl::refine_vsetvls (void) const
-{
- basic_block cfg_bb;
- FOR_EACH_BB_FN (cfg_bb, cfun)
- {
- auto info = get_block_info (cfg_bb).local_dem;
- insn_info *insn = info.get_insn ();
- if (!info.valid_p ())
- continue;
-
- rtx_insn *rinsn = insn->rtl ();
- if (!can_refine_vsetvl_p (cfg_bb, info))
- continue;
-
- /* We can't refine user vsetvl into vsetvl zero,zero since the dest
- will be used by the following instructions. */
- if (vector_config_insn_p (rinsn))
- {
- m_vector_manager->to_refine_vsetvls.add (rinsn);
- continue;
- }
-
- /* If all incoming edges to a block have a vector state that is compatbile
- with the block. In such a case we need not emit a vsetvl in the current
- block. */
-
- gcc_assert (has_vtype_op (insn->rtl ()));
- rinsn = PREV_INSN (insn->rtl ());
- gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ())));
- if (m_vector_manager->all_avail_in_compatible_p (cfg_bb))
- {
- size_t id = m_vector_manager->get_expr_id (info);
- if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], id))
- continue;
- eliminate_insn (rinsn);
- }
- else
- {
- rtx new_pat
- = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
- change_insn (rinsn, new_pat);
- }
- }
-}
-
-void
-pass_vsetvl::cleanup_vsetvls ()
-{
- basic_block cfg_bb;
- FOR_EACH_BB_FN (cfg_bb, cfun)
- {
- auto &info = get_block_info (cfg_bb).reaching_out;
- gcc_assert (m_vector_manager->expr_set_num (
- m_vector_manager->vector_del[cfg_bb->index])
- <= 1);
- for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
- {
- if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], i))
- {
- if (info.dirty_p ())
- info.set_unknown ();
- else
- {
- const auto dem = get_block_info (cfg_bb).local_dem;
- gcc_assert (dem == *m_vector_manager->vector_exprs[i]);
- insn_info *insn = dem.get_insn ();
- gcc_assert (insn && insn->rtl ());
- rtx_insn *rinsn;
- /* We can't eliminate user vsetvl since the dest will be used
- * by the following instructions. */
- if (vector_config_insn_p (insn->rtl ()))
- {
- m_vector_manager->to_delete_vsetvls.add (insn->rtl ());
- continue;
- }
-
- gcc_assert (has_vtype_op (insn->rtl ()));
- rinsn = PREV_INSN (insn->rtl ());
- gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ())));
- eliminate_insn (rinsn);
- }
- }
- }
- }
-}
-
-bool
-pass_vsetvl::commit_vsetvls (void)
-{
- bool need_commit = false;
-
- for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++)
- {
- for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
- {
- edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed);
- if (bitmap_bit_p (m_vector_manager->vector_insert[ed], i))
- {
- const vector_insn_info *require
- = m_vector_manager->vector_exprs[i];
- gcc_assert (require->valid_or_dirty_p ());
- rtl_profile_for_edge (eg);
- start_sequence ();
-
- insn_info *insn = require->get_insn ();
- vector_insn_info prev_info = vector_insn_info ();
- sbitmap bitdata = m_vector_manager->vector_avout[eg->src->index];
- if (m_vector_manager->all_same_ratio_p (bitdata)
- && m_vector_manager->all_same_avl_p (eg->dest, bitdata))
- {
- size_t first = bitmap_first_set_bit (bitdata);
- prev_info = *m_vector_manager->vector_exprs[first];
- }
-
- insert_vsetvl (EMIT_DIRECT, insn->rtl (), *require, prev_info);
- rtx_insn *rinsn = get_insns ();
- end_sequence ();
- default_rtl_profile ();
-
- /* We should not get an abnormal edge here. */
- gcc_assert (!(eg->flags & EDGE_ABNORMAL));
- need_commit = true;
- insert_insn_on_edge (rinsn, eg);
-
- if (dump_file)
- {
- fprintf (dump_file,
- "\nInsert vsetvl insn %d at edge %d from <bb %d> to "
- "<bb %d>:\n",
- INSN_UID (rinsn), ed, eg->src->index,
- eg->dest->index);
- print_rtl_single (dump_file, rinsn);
- }
- }
- }
- }
-
- for (const bb_info *bb : crtl->ssa->bbs ())
- {
- basic_block cfg_bb = bb->cfg_bb ();
- const auto reaching_out = get_block_info (cfg_bb).reaching_out;
- if (!reaching_out.dirty_p ())
- continue;
-
- rtx new_pat;
- if (!reaching_out.demand_p (DEMAND_AVL))
- {
- vl_vtype_info new_info = reaching_out;
- new_info.set_avl_info (avl_info (const0_rtx, nullptr));
- new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX);
- }
- else if (can_refine_vsetvl_p (cfg_bb, reaching_out))
- new_pat
- = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, reaching_out, NULL_RTX);
- else if (vlmax_avl_p (reaching_out.get_avl ()))
- {
- rtx vl = reaching_out.get_avl_or_vl_reg ();
- new_pat = gen_vsetvl_pat (VSETVL_NORMAL, reaching_out, vl);
- }
- else
- new_pat
- = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, reaching_out, NULL_RTX);
-
- edge eg;
- edge_iterator eg_iterator;
- FOR_EACH_EDGE (eg, eg_iterator, cfg_bb->succs)
- {
- /* We should not get an abnormal edge here. */
- gcc_assert (!(eg->flags & EDGE_ABNORMAL));
- /* We failed to optimize this case in Phase 3 (earliest fusion):
-
- bb 2: vsetvl a5, a3 ...
- goto bb 4
- bb 3: vsetvl a5, a2 ...
- goto bb 4
- bb 4: vsetvli zero, a5 ---> Redundant, should be elided.
-
- Since "a5" value can come from either bb 2 or bb 3, we can't make
- it optimized in Phase 3 which will make phase 3 so complicated.
- Now, we do post optimization here to elide the redundant VSETVL
- insn in bb4. */
- if (m_vector_manager->vsetvl_dominated_by_all_preds_p (cfg_bb,
- reaching_out))
- continue;
-
- start_sequence ();
- emit_insn (copy_rtx (new_pat));
- rtx_insn *rinsn = get_insns ();
- end_sequence ();
-
- insert_insn_on_edge (rinsn, eg);
- need_commit = true;
- if (dump_file)
- {
- fprintf (dump_file,
- "\nInsert vsetvl insn %d from <bb %d> to <bb %d>:\n",
- INSN_UID (rinsn), cfg_bb->index, eg->dest->index);
- print_rtl_single (dump_file, rinsn);
- }
- }
- }
-
- return need_commit;
-}
-
-void
-pass_vsetvl::pre_vsetvl (void)
-{
- /* Compute entity list. */
- prune_expressions ();
-
- m_vector_manager->create_bitmap_vectors ();
- compute_local_properties ();
- m_vector_manager->vector_edge_list = pre_edge_lcm_avs (
- m_vector_manager->vector_exprs.length (), m_vector_manager->vector_transp,
- m_vector_manager->vector_comp, m_vector_manager->vector_antic,
- m_vector_manager->vector_kill, m_vector_manager->vector_avin,
- m_vector_manager->vector_avout, &m_vector_manager->vector_insert,
- &m_vector_manager->vector_del);
-
- /* We should dump the information before CFG is changed. Otherwise it will
- produce ICE (internal compiler error). */
- if (dump_file && (dump_flags & TDF_DETAILS))
- m_vector_manager->dump (dump_file);
-
- refine_vsetvls ();
- cleanup_vsetvls ();
- bool need_commit = commit_vsetvls ();
- if (need_commit)
- commit_edge_insertions ();
-}
-
/* Some instruction can not be accessed in RTL_SSA when we don't re-init
the new RTL_SSA framework but it is definetely at the END of the block.