new file mode 100644
@@ -0,0 +1,63 @@
+;; Machine description for auto-vectorization using RVV for GNU compiler.
+;; Copyright (C) 2023-2023 Free Software Foundation, Inc.
+;; Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; =========================================================================
+;; == While_len
+;; =========================================================================
+
+(define_expand "while_len<mode>"
+ [(match_operand:P 0 "register_operand")
+ (match_operand:P 1 "vector_length_operand")
+ (match_operand:P 2 "")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_while_len (operands);
+ DONE;
+})
+
+;; =========================================================================
+;; == Loads/Stores
+;; =========================================================================
+
+;; len_load/len_store is sub-optimal pattern for RVV auto-vectorization support.
+;; We will replace them when len_maskload/len_maskstore is supported in loop vectorizer.
+(define_expand "len_load_<mode>"
+ [(match_operand:V 0 "register_operand")
+ (match_operand:V 1 "memory_operand")
+ (match_operand 2 "vector_length_operand")
+ (match_operand 3 "const_0_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::emit_nonvlmax_op (code_for_pred_mov (<MODE>mode), operands[0],
+ operands[1], operands[2], <VM>mode);
+ DONE;
+})
+
+(define_expand "len_store_<mode>"
+ [(match_operand:V 0 "memory_operand")
+ (match_operand:V 1 "register_operand")
+ (match_operand 2 "vector_length_operand")
+ (match_operand 3 "const_0_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::emit_nonvlmax_op (code_for_pred_mov (<MODE>mode), operands[0],
+ operands[1], operands[2], <VM>mode);
+ DONE;
+})
@@ -67,6 +67,22 @@ enum stack_protector_guard {
SSP_GLOBAL /* global canary */
};
+/* RISC-V auto-vectorization preference. */
+enum riscv_autovec_preference_enum {
+ NO_AUTOVEC,
+ RVV_SCALABLE,
+ RVV_FIXED_VLMIN,
+ RVV_FIXED_VLMAX
+};
+
+/* RISC-V auto-vectorization RVV LMUL. */
+enum riscv_autovec_lmul_enum {
+ RVV_M1 = 1,
+ RVV_M2 = 2,
+ RVV_M4 = 4,
+ RVV_M8 = 8
+};
+
#define MASK_ZICSR (1 << 0)
#define MASK_ZIFENCEI (1 << 1)
@@ -184,7 +184,6 @@ enum mask_policy
enum tail_policy get_prefer_tail_policy ();
enum mask_policy get_prefer_mask_policy ();
rtx get_avl_type_rtx (enum avl_type);
-opt_machine_mode get_vector_mode (scalar_mode, poly_uint64);
bool simm5_p (rtx);
bool neg_simm5_p (rtx);
#ifdef RTX_CODE
@@ -206,6 +205,8 @@ enum vlen_enum
bool slide1_sew64_helper (int, machine_mode, machine_mode,
machine_mode, rtx *);
rtx gen_avl_for_scalar_move (rtx);
+machine_mode preferred_simd_mode (scalar_mode);
+void expand_while_len (rtx *);
}
/* We classify builtin types into two classes:
@@ -43,6 +43,7 @@
#include "optabs.h"
#include "tm-constrs.h"
#include "rtx-vector-builder.h"
+#include "targhooks.h"
using namespace riscv_vector;
@@ -424,7 +425,7 @@ get_avl_type_rtx (enum avl_type type)
/* Return the RVV vector mode that has NUNITS elements of mode INNER_MODE.
This function is not only used by builtins, but also will be used by
auto-vectorization in the future. */
-opt_machine_mode
+static opt_machine_mode
get_vector_mode (scalar_mode inner_mode, poly_uint64 nunits)
{
enum mode_class mclass;
@@ -729,4 +730,62 @@ gen_avl_for_scalar_move (rtx avl)
}
}
+/* SCALABLE means that the vector-length is agnostic (run-time invariant and
+ compile-time unknown). FIXED meands that the vector-length is specific
+ (compile-time known). Both RVV_SCALABLE and RVV_FIXED_VLMAX are doing
+ auto-vectorization using VLMAX vsetvl configuration. */
+static bool
+autovec_use_vlmax_p (void)
+{
+ return riscv_autovec_preference == RVV_SCALABLE
+ || riscv_autovec_preference == RVV_FIXED_VLMAX;
+}
+
+/* Return the vectorization machine mode for RVV according to LMUL. */
+machine_mode
+preferred_simd_mode (scalar_mode mode)
+{
+ if (autovec_use_vlmax_p ())
+ {
+ /* We use LMUL = 1 as base bytesize which is BYTES_PER_RISCV_VECTOR and
+ riscv_autovec_lmul as multiply factor to calculate the the NUNITS to
+ get the auto-vectorization mode. */
+ poly_uint64 nunits;
+ poly_uint64 vector_size
+ = BYTES_PER_RISCV_VECTOR * ((int) riscv_autovec_lmul);
+ poly_uint64 scalar_size = GET_MODE_SIZE (mode);
+ if (!multiple_p (vector_size, scalar_size, &nunits))
+ return word_mode;
+ machine_mode rvv_mode;
+ if (get_vector_mode (mode, nunits).exists (&rvv_mode))
+ return rvv_mode;
+ }
+ /* TODO: We will support minimum length VLS auto-vectorization in the future.
+ */
+ return word_mode;
+}
+
+void
+expand_while_len (rtx *ops)
+{
+ poly_int64 nunits;
+ gcc_assert (poly_int_rtx_p (ops[2], &nunits));
+ /* We arbitrary picked QImode as inner scalar mode to get vector mode.
+ since vsetvl only demand ratio. We let VSETVL PASS to optimize it. */
+ scalar_int_mode mode = QImode;
+ machine_mode rvv_mode;
+ if (get_vector_mode (mode, nunits).exists (&rvv_mode))
+ {
+ rtx vsetvl_rtx
+ = gen_no_side_effects_vsetvl_rtx (rvv_mode, ops[0], ops[1]);
+ emit_insn (vsetvl_rtx);
+ }
+ else
+ {
+ rtx tmp = gen_reg_rtx (Pmode);
+ emit_move_insn (tmp, gen_int_mode (nunits, Pmode));
+ expand_binop (Pmode, umin_optab, tmp, ops[1], ops[0], true, OPTAB_LIB);
+ }
+}
+
} // namespace riscv_vector
@@ -121,37 +121,43 @@ TODO: FP16 vector needs support of 'zvfh', we don't support it yet. */
/* Mask modes. Disable VNx128BI when TARGET_MIN_VLEN < 128. */
/* Mask modes. Disable VNx64BImode when TARGET_MIN_VLEN == 32. */
/* Mask modes. Disable VNx1BImode when TARGET_MIN_VLEN >= 128. */
-ENTRY (VNx128BI, TARGET_MIN_VLEN >= 128, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_8, 1)
+ENTRY (VNx128BI, TARGET_MIN_VLEN >= 128, LMUL_RESERVED, 0, LMUL_RESERVED, 0,
+ LMUL_8, 1)
ENTRY (VNx64BI, TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, LMUL_8, 1, LMUL_4, 2)
ENTRY (VNx32BI, true, LMUL_8, 1, LMUL_4, 2, LMUL_2, 4)
ENTRY (VNx16BI, true, LMUL_4, 2, LMUL_2, 4, LMUL_1, 8)
ENTRY (VNx8BI, true, LMUL_2, 4, LMUL_1, 8, LMUL_F2, 16)
ENTRY (VNx4BI, true, LMUL_1, 8, LMUL_F2, 16, LMUL_F4, 32)
ENTRY (VNx2BI, true, LMUL_F2, 16, LMUL_F4, 32, LMUL_F8, 64)
-ENTRY (VNx1BI, TARGET_MIN_VLEN < 128, LMUL_F4, 32, LMUL_F8, 64, LMUL_RESERVED, 0)
+ENTRY (VNx1BI, TARGET_MIN_VLEN < 128, LMUL_F4, 32, LMUL_F8, 64, LMUL_RESERVED,
+ 0)
/* SEW = 8. Disable VNx128QImode when TARGET_MIN_VLEN < 128. */
/* SEW = 8. Disable VNx64QImode when TARGET_MIN_VLEN == 32. */
/* SEW = 8. Disable VNx1QImode when TARGET_MIN_VLEN >= 128. */
-ENTRY (VNx128QI, TARGET_MIN_VLEN >= 128, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_8, 1)
+ENTRY (VNx128QI, TARGET_MIN_VLEN >= 128, LMUL_RESERVED, 0, LMUL_RESERVED, 0,
+ LMUL_8, 1)
ENTRY (VNx64QI, TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, LMUL_8, 1, LMUL_4, 2)
ENTRY (VNx32QI, true, LMUL_8, 1, LMUL_4, 2, LMUL_2, 4)
ENTRY (VNx16QI, true, LMUL_4, 2, LMUL_2, 4, LMUL_1, 8)
ENTRY (VNx8QI, true, LMUL_2, 4, LMUL_1, 8, LMUL_F2, 16)
ENTRY (VNx4QI, true, LMUL_1, 8, LMUL_F2, 16, LMUL_F4, 32)
ENTRY (VNx2QI, true, LMUL_F2, 16, LMUL_F4, 32, LMUL_F8, 64)
-ENTRY (VNx1QI, TARGET_MIN_VLEN < 128, LMUL_F4, 32, LMUL_F8, 64, LMUL_RESERVED, 0)
+ENTRY (VNx1QI, TARGET_MIN_VLEN < 128, LMUL_F4, 32, LMUL_F8, 64, LMUL_RESERVED,
+ 0)
/* SEW = 16. Disable VNx64HImode when TARGET_MIN_VLEN < 128. */
/* SEW = 16. Disable VNx32HImode when TARGET_MIN_VLEN == 32. */
/* SEW = 16. Disable VNx1HImode when TARGET_MIN_VLEN >= 128. */
-ENTRY (VNx64HI, TARGET_MIN_VLEN >= 128, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_8, 2)
+ENTRY (VNx64HI, TARGET_MIN_VLEN >= 128, LMUL_RESERVED, 0, LMUL_RESERVED, 0,
+ LMUL_8, 2)
ENTRY (VNx32HI, TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, LMUL_8, 2, LMUL_4, 4)
ENTRY (VNx16HI, true, LMUL_8, 2, LMUL_4, 4, LMUL_2, 8)
ENTRY (VNx8HI, true, LMUL_4, 4, LMUL_2, 8, LMUL_1, 16)
ENTRY (VNx4HI, true, LMUL_2, 8, LMUL_1, 16, LMUL_F2, 32)
ENTRY (VNx2HI, true, LMUL_1, 16, LMUL_F2, 32, LMUL_F4, 64)
-ENTRY (VNx1HI, TARGET_MIN_VLEN < 128, LMUL_F2, 32, LMUL_F4, 64, LMUL_RESERVED, 0)
+ENTRY (VNx1HI, TARGET_MIN_VLEN < 128, LMUL_F2, 32, LMUL_F4, 64, LMUL_RESERVED,
+ 0)
/* TODO:Disable all FP16 vector, enable them when 'zvfh' is supported. */
ENTRY (VNx64HF, false, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_8, 2)
@@ -167,38 +173,45 @@ ENTRY (VNx1HF, false, LMUL_F2, 32, LMUL_F4, 64, LMUL_RESERVED, 0)
For single-precision floating-point, we need TARGET_VECTOR_FP32 ==
RVV_ENABLE. */
/* SEW = 32. Disable VNx1SImode/VNx1SFmode when TARGET_MIN_VLEN >= 128. */
-ENTRY (VNx32SI, TARGET_MIN_VLEN >= 128, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_8, 4)
+ENTRY (VNx32SI, TARGET_MIN_VLEN >= 128, LMUL_RESERVED, 0, LMUL_RESERVED, 0,
+ LMUL_8, 4)
ENTRY (VNx16SI, TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, LMUL_8, 4, LMUL_4, 8)
ENTRY (VNx8SI, true, LMUL_8, 4, LMUL_4, 8, LMUL_2, 16)
ENTRY (VNx4SI, true, LMUL_4, 8, LMUL_2, 16, LMUL_1, 32)
ENTRY (VNx2SI, true, LMUL_2, 16, LMUL_1, 32, LMUL_F2, 64)
ENTRY (VNx1SI, TARGET_MIN_VLEN < 128, LMUL_1, 32, LMUL_F2, 64, LMUL_RESERVED, 0)
-ENTRY (VNx32SF, TARGET_VECTOR_FP32 && (TARGET_MIN_VLEN >= 128), LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_8, 4)
+ENTRY (VNx32SF, TARGET_VECTOR_FP32 && (TARGET_MIN_VLEN >= 128), LMUL_RESERVED,
+ 0, LMUL_RESERVED, 0, LMUL_8, 4)
ENTRY (VNx16SF, TARGET_VECTOR_FP32 && (TARGET_MIN_VLEN > 32), LMUL_RESERVED, 0,
LMUL_8, 4, LMUL_4, 8)
ENTRY (VNx8SF, TARGET_VECTOR_FP32, LMUL_8, 4, LMUL_4, 8, LMUL_2, 16)
ENTRY (VNx4SF, TARGET_VECTOR_FP32, LMUL_4, 8, LMUL_2, 16, LMUL_1, 32)
ENTRY (VNx2SF, TARGET_VECTOR_FP32, LMUL_2, 16, LMUL_1, 32, LMUL_F2, 64)
-ENTRY (VNx1SF, TARGET_VECTOR_FP32 && TARGET_MIN_VLEN < 128, LMUL_1, 32, LMUL_F2, 64, LMUL_RESERVED, 0)
+ENTRY (VNx1SF, TARGET_VECTOR_FP32 && TARGET_MIN_VLEN < 128, LMUL_1, 32, LMUL_F2,
+ 64, LMUL_RESERVED, 0)
/* SEW = 64. Disable VNx16DImode/VNx16DFmode when TARGET_MIN_VLEN < 128. */
/* SEW = 64. Enable VNx8DImode/VNx8DFmode when TARGET_MIN_VLEN > 32.
For double-precision floating-point, we need TARGET_VECTOR_FP64 ==
RVV_ENABLE. */
/* SEW = 64. Disable VNx1DImode/VNx1DFmode when TARGET_MIN_VLEN >= 128. */
-ENTRY (VNx16DI, TARGET_MIN_VLEN >= 128, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_8, 8)
-ENTRY (VNx8DI, TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, LMUL_8, 8, LMUL_4, 16)
-ENTRY (VNx4DI, TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, LMUL_4, 16, LMUL_2, 32)
-ENTRY (VNx2DI, TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, LMUL_2, 32, LMUL_1, 64)
-ENTRY (VNx1DI, TARGET_MIN_VLEN > 32 && TARGET_MIN_VLEN < 128, LMUL_RESERVED, 0, LMUL_1, 64, LMUL_RESERVED, 0)
-
-ENTRY (VNx16DF, TARGET_VECTOR_FP64 && (TARGET_MIN_VLEN >= 128), LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_8, 8)
+ENTRY (VNx16DI, TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128, LMUL_RESERVED,
+ 0, LMUL_RESERVED, 0, LMUL_8, 8)
+ENTRY (VNx8DI, TARGET_VECTOR_ELEN_64, LMUL_RESERVED, 0, LMUL_8, 8, LMUL_4, 16)
+ENTRY (VNx4DI, TARGET_VECTOR_ELEN_64, LMUL_RESERVED, 0, LMUL_4, 16, LMUL_2, 32)
+ENTRY (VNx2DI, TARGET_VECTOR_ELEN_64, LMUL_RESERVED, 0, LMUL_2, 32, LMUL_1, 64)
+ENTRY (VNx1DI, TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128, LMUL_RESERVED, 0,
+ LMUL_1, 64, LMUL_RESERVED, 0)
+
+ENTRY (VNx16DF, TARGET_VECTOR_FP64 && (TARGET_MIN_VLEN >= 128), LMUL_RESERVED,
+ 0, LMUL_RESERVED, 0, LMUL_8, 8)
ENTRY (VNx8DF, TARGET_VECTOR_FP64 && (TARGET_MIN_VLEN > 32), LMUL_RESERVED, 0,
LMUL_8, 8, LMUL_4, 16)
ENTRY (VNx4DF, TARGET_VECTOR_FP64, LMUL_RESERVED, 0, LMUL_4, 16, LMUL_2, 32)
ENTRY (VNx2DF, TARGET_VECTOR_FP64, LMUL_RESERVED, 0, LMUL_2, 32, LMUL_1, 64)
-ENTRY (VNx1DF, TARGET_VECTOR_FP64 && TARGET_MIN_VLEN < 128, LMUL_RESERVED, 0, LMUL_1, 64, LMUL_RESERVED, 0)
+ENTRY (VNx1DF, TARGET_VECTOR_FP64 && TARGET_MIN_VLEN < 128, LMUL_RESERVED, 0,
+ LMUL_1, 64, LMUL_RESERVED, 0)
#undef TARGET_VECTOR_FP32
#undef TARGET_VECTOR_FP64
@@ -532,6 +532,43 @@ get_all_predecessors (basic_block cfg_bb)
return blocks;
}
+/* Recursively find all successor blocks for cfg_bb. */
+static hash_set<basic_block>
+get_all_successors (basic_block cfg_bb)
+{
+ hash_set<basic_block> blocks;
+ auto_vec<basic_block> work_list;
+ hash_set<basic_block> visited_list;
+ work_list.safe_push (cfg_bb);
+
+ while (!work_list.is_empty ())
+ {
+ basic_block new_cfg_bb = work_list.pop ();
+ visited_list.add (new_cfg_bb);
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, new_cfg_bb->succs)
+ {
+ if (!visited_list.contains (e->dest))
+ work_list.safe_push (e->dest);
+ blocks.add (e->dest);
+ }
+ }
+ return blocks;
+}
+
+/* Get all overlap blocks between set. */
+static hash_set<basic_block>
+get_all_overlap_blocks (hash_set<basic_block> blocks1,
+ hash_set<basic_block> blocks2)
+{
+ hash_set<basic_block> blocks;
+ for (const auto &block : blocks1)
+ if (blocks2.contains (block))
+ blocks.add (block);
+ return blocks;
+}
+
/* Return true if there is an INSN in insns staying in the block BB. */
static bool
any_set_in_bb_p (hash_set<set_info *> sets, const bb_info *bb)
@@ -1054,6 +1091,51 @@ change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info)
change_insn (rinsn, new_pat);
}
+static void
+local_eliminate_vsetvl_insn (const vector_insn_info &dem)
+{
+ const insn_info *insn = dem.get_insn ();
+ if (!insn || insn->is_artificial ())
+ return;
+ rtx_insn *rinsn = insn->rtl ();
+ const bb_info *bb = insn->bb ();
+ if (vsetvl_insn_p (rinsn))
+ {
+ rtx vl = get_vl (rinsn);
+ for (insn_info *i = insn->next_nondebug_insn ();
+ real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
+ {
+ if (i->is_call () || i->is_asm ()
+ || find_access (i->defs (), VL_REGNUM)
+ || find_access (i->defs (), VTYPE_REGNUM))
+ return;
+
+ if (has_vtype_op (i->rtl ()))
+ {
+ if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl ())))
+ return;
+ rtx avl = get_avl (i->rtl ());
+ if (avl != vl)
+ return;
+ set_info *def = find_access (i->uses (), REGNO (avl))->def ();
+ if (def->insn () != insn)
+ return;
+
+ vector_insn_info new_info;
+ new_info.parse_insn (i);
+ if (!new_info.skip_avl_compatible_p (dem))
+ return;
+
+ new_info.set_avl_info (dem.get_avl_info ());
+ new_info = dem.merge (new_info, LOCAL_MERGE);
+ change_vsetvl_insn (insn, new_info);
+ eliminate_insn (PREV_INSN (i->rtl ()));
+ return;
+ }
+ }
+ }
+}
+
static bool
source_equal_p (insn_info *insn1, insn_info *insn2)
{
@@ -1984,6 +2066,19 @@ vector_insn_info::compatible_p (const vector_insn_info &other) const
return true;
}
+bool
+vector_insn_info::skip_avl_compatible_p (const vector_insn_info &other) const
+{
+ gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p ()
+ && "Can't compare invalid demanded infos");
+ unsigned array_size = sizeof (incompatible_conds) / sizeof (demands_cond);
+ /* Bypass AVL incompatible cases. */
+ for (unsigned i = 1; i < array_size; i++)
+ if (incompatible_conds[i].dual_incompatible_p (*this, other))
+ return false;
+ return true;
+}
+
bool
vector_insn_info::compatible_avl_p (const vl_vtype_info &other) const
{
@@ -2178,7 +2273,7 @@ vector_insn_info::fuse_mask_policy (const vector_insn_info &info1,
vector_insn_info
vector_insn_info::merge (const vector_insn_info &merge_info,
- enum merge_type type = LOCAL_MERGE) const
+ enum merge_type type) const
{
if (!vsetvl_insn_p (get_insn ()->rtl ()))
gcc_assert (this->compatible_p (merge_info)
@@ -2642,6 +2737,7 @@ private:
void pre_vsetvl (void);
/* Phase 5. */
+ bool global_eliminate_vsetvl_p (const bb_info *) const;
void cleanup_insns (void) const;
/* Phase 6. */
@@ -2716,7 +2812,7 @@ pass_vsetvl::compute_local_backward_infos (const bb_info *bb)
&& !reg_available_p (insn, change))
&& change.compatible_p (info))
{
- info = change.merge (info);
+ info = change.merge (info, LOCAL_MERGE);
/* Fix PR109399, we should update user vsetvl instruction
if there is a change in demand fusion. */
if (vsetvl_insn_p (insn->rtl ()))
@@ -3990,14 +4086,124 @@ pass_vsetvl::pre_vsetvl (void)
commit_edge_insertions ();
}
+/* Eliminate VSETVL insn that has multiple AVL source, we don't let LCM
+ do that since it's quite complicated and may be buggy in some situations.
+*/
+bool
+pass_vsetvl::global_eliminate_vsetvl_p (const bb_info *bb) const
+{
+ const auto &dem
+ = m_vector_manager->vector_block_infos[bb->index ()].local_dem;
+ if (!dem.valid_p ())
+ return false;
+ if (dem.get_insn ()->is_artificial ())
+ return false;
+
+ insn_info *insn = dem.get_insn ();
+ if (!has_vtype_op (insn->rtl ()))
+ return false;
+
+ rtx_insn *prev_rinsn = PREV_INSN (insn->rtl ());
+ if (!prev_rinsn)
+ return false;
+ if (!vsetvl_discard_result_insn_p (prev_rinsn))
+ return false;
+
+ if (!dem.has_avl_reg ())
+ return false;
+ rtx avl = dem.get_avl ();
+ set_info *def = find_access (insn->uses (), REGNO (avl))->def ();
+ hash_set<set_info *> sets = get_all_sets (def, true, true, true);
+ if (sets.is_empty ())
+ return false;
+
+ sbitmap avin = m_vector_manager->vector_avin[bb->index ()];
+ if (!bitmap_empty_p (avin))
+ return false;
+
+ hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (bb->cfg_bb ());
+ auto_vec<vector_insn_info> vsetvl_infos;
+ for (const auto &set : sets)
+ {
+ if (set->insn ()->is_artificial ())
+ return false;
+ insn_info *set_insn = set->insn ();
+ if (!vsetvl_insn_p (set_insn->rtl ()))
+ return false;
+ vector_insn_info vsetvl_info;
+ vsetvl_info.parse_insn (set_insn);
+ if (!vsetvl_info.skip_avl_compatible_p (dem))
+ return false;
+
+ /* Make sure there is no other vsetvl from set_bb to bb. */
+ hash_set<basic_block> succ_cfg_bbs
+ = get_all_successors (set->insn ()->bb ()->cfg_bb ());
+ hash_set<basic_block> overlap_cfg_bbs
+ = get_all_overlap_blocks (pred_cfg_bbs, succ_cfg_bbs);
+ for (const auto &overlap_cfg_bb : overlap_cfg_bbs)
+ {
+ unsigned int index = overlap_cfg_bb->index;
+ if (index == bb->index ())
+ continue;
+ const auto &overlap_dem
+ = m_vector_manager->vector_block_infos[index].local_dem;
+ /* TODO: Currently, we only allow optimize user vsetvl when
+ there is empty overlap blocks.
+
+ We could support check accurately there is no instructions
+ modifiy VL/VTYPE in overlap blocks. */
+ if (!overlap_dem.empty_p ())
+ return false;
+ }
+ vsetvl_infos.safe_push (vsetvl_info);
+ }
+
+ /* Update VTYPE for each SET vsetvl instructions. */
+ for (const auto &vsetvl_info : vsetvl_infos)
+ {
+ vector_insn_info info = dem;
+ info.set_avl_info (vsetvl_info.get_avl_info ());
+ info = vsetvl_info.merge (info, LOCAL_MERGE);
+ insn_info *vsetvl_insn = vsetvl_info.get_insn ();
+ change_vsetvl_insn (vsetvl_insn, info);
+ }
+
+ return true;
+}
+
void
pass_vsetvl::cleanup_insns (void) const
{
for (const bb_info *bb : crtl->ssa->bbs ())
{
+ /* Eliminate global vsetvl:
+ bb 0:
+ vsetvl a5,zero,...
+ bb 1:
+ vsetvl a5,a6,...
+
+ bb 2:
+ vsetvl zero,a5.
+
+ Eliminate vsetvl in bb2 when a5 is only coming from
+ bb 0 and bb1. */
+ const auto &local_dem
+ = m_vector_manager->vector_block_infos[bb->index ()].local_dem;
+ if (global_eliminate_vsetvl_p (bb))
+ eliminate_insn (PREV_INSN (local_dem.get_insn ()->rtl ()));
+
for (insn_info *insn : bb->real_nondebug_insns ())
{
rtx_insn *rinsn = insn->rtl ();
+ const auto &dem = m_vector_manager->vector_insn_infos[insn->uid ()];
+ /* Eliminate local vsetvl:
+ bb 0:
+ vsetvl a5,a6,...
+ vsetvl zero,a5.
+
+ Eliminate vsetvl in bb2 when a5 is only coming from
+ bb 0. */
+ local_eliminate_vsetvl_insn (dem);
if (vlmax_avl_insn_p (rinsn))
{
@@ -380,6 +380,7 @@ public:
void fuse_mask_policy (const vector_insn_info &, const vector_insn_info &);
bool compatible_p (const vector_insn_info &) const;
+ bool skip_avl_compatible_p (const vector_insn_info &) const;
bool compatible_avl_p (const vl_vtype_info &) const;
bool compatible_avl_p (const avl_info &) const;
bool compatible_vtype_p (const vl_vtype_info &) const;
@@ -6217,7 +6217,15 @@ riscv_convert_vector_bits (void)
to set RVV mode size. The RVV machine modes size are run-time constant if
TARGET_VECTOR is enabled. The RVV machine modes size remains default
compile-time constant if TARGET_VECTOR is disabled. */
- return TARGET_VECTOR ? poly_uint16 (1, 1) : 1;
+ if (TARGET_VECTOR)
+ {
+ if (riscv_autovec_preference == RVV_FIXED_VLMAX)
+ return (int) TARGET_MIN_VLEN / (riscv_bytes_per_vector_chunk * 8);
+ else
+ return poly_uint16 (1, 1);
+ }
+ else
+ return 1;
}
/* Implement TARGET_OPTION_OVERRIDE. */
@@ -7076,6 +7084,27 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
return shamt == ctz_hwi (mask);
}
+/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
+
+static machine_mode
+riscv_preferred_simd_mode (scalar_mode mode)
+{
+ /* We only enable auto-vectorization when TARGET_MIN_VLEN >= 128
+ which is -march=rv64gcv. Since GCC loop vectorizer report ICE
+ when we enable -march=rv64gc_zve32* and -march=rv32gc_zve64x.
+ in tree-vect-slp.cc:437. Since we have VNx1SImode in -march=*zve32*
+ and VNx1DImode in -march=*zve64*, they are enabled in targetm.
+ vector_mode_supported_p and SLP vectorizer will try to use them.
+ Currently, we can support auto-vectorization in -march=rv32_zve32x_zvl128b.
+ Wheras, -march=rv32_zve32x_zvl32b or -march=rv32_zve32x_zvl64b are
+ disabled.
+ */
+ if (TARGET_VECTOR && TARGET_MIN_VLEN >= 128)
+ return riscv_vector::preferred_simd_mode (mode);
+
+ return word_mode;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -7327,6 +7356,9 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
#undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
#define TARGET_DWARF_POLY_INDETERMINATE_VALUE riscv_dwarf_poly_indeterminate_value
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE riscv_preferred_simd_mode
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-riscv.h"
@@ -254,3 +254,43 @@ Enum(isa_spec_class) String(20191213) Value(ISA_SPEC_CLASS_20191213)
misa-spec=
Target RejectNegative Joined Enum(isa_spec_class) Var(riscv_isa_spec) Init(TARGET_DEFAULT_ISA_SPEC)
Set the version of RISC-V ISA spec.
+
+Enum
+Name(riscv_autovec_preference) Type(enum riscv_autovec_preference_enum)
+The RISC-V auto-vectorization preference:
+
+EnumValue
+Enum(riscv_autovec_preference) String(none) Value(NO_AUTOVEC)
+
+EnumValue
+Enum(riscv_autovec_preference) String(scalable) Value(RVV_SCALABLE)
+
+EnumValue
+Enum(riscv_autovec_preference) String(fixed-vlmin) Value(RVV_FIXED_VLMIN)
+
+EnumValue
+Enum(riscv_autovec_preference) String(fixed-vlmax) Value(RVV_FIXED_VLMAX)
+
+-param=riscv-autovec-preference=
+Target RejectNegative Joined Enum(riscv_autovec_preference) Var(riscv_autovec_preference) Init(NO_AUTOVEC)
+-param=riscv-autovec-preference=<string> Set the preference of auto-vectorization in RISC-V port.
+
+Enum
+Name(riscv_autovec_lmul) Type(enum riscv_autovec_lmul_enum)
+The RVV possible LMUL:
+
+EnumValue
+Enum(riscv_autovec_lmul) String(m1) Value(RVV_M1)
+
+EnumValue
+Enum(riscv_autovec_lmul) String(m2) Value(RVV_M2)
+
+EnumValue
+Enum(riscv_autovec_lmul) String(m4) Value(RVV_M4)
+
+EnumValue
+Enum(riscv_autovec_lmul) String(m8) Value(RVV_M8)
+
+-param=riscv-autovec-lmul=
+Target RejectNegative Joined Enum(riscv_autovec_lmul) Var(riscv_autovec_lmul) Init(RVV_M1)
+-param=riscv-autovec-lmul=<string> Set the RVV LMUL of auto-vectorization in RISC-V port.
@@ -23,7 +23,7 @@
;; This file include :
;;
;; - Intrinsics (https://github.com/riscv/rvv-intrinsic-doc)
-;; - Auto-vectorization (TBD)
+;; - Auto-vectorization (autovec.md)
;; - Combine optimization (TBD)
(include "vector-iterators.md")
@@ -2015,7 +2015,7 @@
riscv_vector::neg_simm5_p (operands[4]),
[] (rtx *operands, rtx boardcast_scalar) {
emit_insn (gen_pred_sub<mode> (operands[0], operands[1],
- operands[2], operands[3], boardcast_scalar, operands[5],
+ operands[2], boardcast_scalar, operands[3], operands[5],
operands[6], operands[7], operands[8]));
}))
DONE;
@@ -7688,3 +7688,5 @@
"vle<sew>ff.v\t%0,%3%p1"
[(set_attr "type" "vldff")
(set_attr "mode" "<MODE>")])
+
+(include "autovec.md")