@@ -36,16 +36,479 @@ along with GCC; see the file COPYING3. If not see
#include "fold-const.h"
#include "tm_p.h"
#include "tree-vectorizer.h"
+#include "gimple-iterator.h"
+#include "bitmap.h"
+#include "ssa.h"
+#include "backend.h"
/* This file should be included last. */
#include "riscv-vector-costs.h"
namespace riscv_vector {
+/* Dynamic LMUL philosophy - Local linear-scan SSA live range based analysis
+ determine LMUL
+
+ - Collect all vectorize STMTs locally for each loop block.
+ - Build program point based graph, ignore non-vectorize STMTs:
+
+ vectorize STMT 0 - point 0
+ scalar STMT 0 - ignore.
+ vectorize STMT 1 - point 1
+ ...
+ - Compute the number of live V_REGs live at each program point
+ - Determine LMUL in VECTOR COST model according to the program point
+ which has maximum live V_REGs.
+
+ Note:
+
+ - BIGGEST_MODE is the biggest LMUL auto-vectorization element mode.
+ It's important for mixed size auto-vectorization (Conversions, ... etc).
+ E.g. For a loop that is vectorizing conversion of INT32 -> INT64.
+ The biggest mode is DImode and LMUL = 8, LMUL = 4 for SImode.
+ We compute the number live V_REGs at each program point according to
+ this information.
+ - We only compute program points and live ranges locally (within a block)
+ since we just need to compute the number of live V_REGs at each program
+ point and we are not really allocating the registers for each SSA.
+ We can make the variable has another local live range in another block
+ if it live out/live in to another block. Such approach doesn't affect
+ out accurate live range analysis.
+ - Current analysis didn't consider any instruction scheduling which
+ may improve the register pressure. So we are conservatively doing the
+ analysis which may end up with smaller LMUL.
+ TODO: Maybe we could support a reasonable live range shrink algorithm
+ which take advantage of instruction scheduling.
+ - We may have these following possible autovec modes analysis:
+
+ 1. M8 -> M4 -> M2 -> M1 (stop analysis here) -> MF2 -> MF4 -> MF8
+ 2. M8 -> M1(M4) -> MF2(M2) -> MF4(M1) (stop analysis here) -> MF8(MF2)
+ 3. M1(M8) -> MF2(M4) -> MF4(M2) -> MF8(M1)
+*/
+static hash_map<class loop *, autovec_info> loop_autovec_infos;
+
+static int
+get_last_live_range (const vec<var_live_range> &live_ranges, tree var)
+{
+ unsigned int ix;
+ var_live_range *live_range;
+ FOR_EACH_VEC_ELT_REVERSE (live_ranges, ix, live_range)
+ if (live_range->var == var)
+ return ix;
+ return -1;
+}
+
+static void
+compute_local_program_points (
+ vec_info *vinfo, hash_map<basic_block, vec<stmt_point>> &program_points_map)
+{
+ if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
+ {
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+ unsigned int nbbs = loop->num_nodes;
+ gimple_stmt_iterator si;
+ unsigned int i;
+ /* Collect the stmts that is vectorized and mark their program point. */
+ for (i = 0; i < nbbs; i++)
+ {
+ int point = 0;
+ basic_block bb = bbs[i];
+ vec<stmt_point> program_points = vNULL;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Compute local program points for bb %d:\n",
+ bb->index);
+ for (si = gsi_start_bb (bbs[i]); !gsi_end_p (si); gsi_next (&si))
+ {
+ if (!(is_gimple_assign (gsi_stmt (si))
+ || is_gimple_call (gsi_stmt (si))))
+ continue;
+ stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
+ if (STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info))
+ != undef_vec_info_type)
+ {
+ stmt_point info = {point, gsi_stmt (si)};
+ program_points.safe_push (info);
+ point++;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "program point %d: %G", info.point,
+ gsi_stmt (si));
+ }
+ }
+ program_points_map.put (bb, program_points);
+ }
+ }
+}
+
+static machine_mode
+compute_local_live_ranges (
+ const hash_map<basic_block, vec<stmt_point>> &program_points_map,
+ hash_map<basic_block, vec<var_live_range>> &live_ranges_map)
+{
+ machine_mode biggest_mode = QImode;
+ if (!program_points_map.is_empty ())
+ {
+ auto_vec<tree> visited_vars;
+ unsigned int i;
+ for (hash_map<basic_block, vec<stmt_point>>::iterator iter
+ = program_points_map.begin ();
+ iter != program_points_map.end (); ++iter)
+ {
+ basic_block bb = (*iter).first;
+ vec<stmt_point> program_points = (*iter).second;
+ vec<var_live_range> live_ranges = vNULL;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Compute local live ranges for bb %d:\n",
+ bb->index);
+ for (const auto program_point : program_points)
+ {
+ int point = program_point.point;
+ gimple *stmt = program_point.stmt;
+ if (!gimple_store_p (stmt))
+ {
+ tree lhs = gimple_get_lhs (stmt);
+ machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+ if (GET_MODE_SIZE (mode).to_constant ()
+ > GET_MODE_SIZE (biggest_mode).to_constant ())
+ biggest_mode = mode;
+ var_live_range range = {lhs, point, point};
+ live_ranges.safe_push (range);
+ }
+ for (i = 0; i < gimple_num_args (stmt); i++)
+ {
+ tree var = gimple_arg (stmt, i);
+ if (is_gimple_reg (var) && !POINTER_TYPE_P (TREE_TYPE (var)))
+ {
+ machine_mode mode = TYPE_MODE (TREE_TYPE (var));
+ if (GET_MODE_SIZE (mode).to_constant ()
+ > GET_MODE_SIZE (biggest_mode).to_constant ())
+ biggest_mode = mode;
+ int index = get_last_live_range (live_ranges, var);
+ if (index == -1)
+ {
+ var_live_range range = {var, 0, point};
+ live_ranges.safe_push (range);
+ }
+ else
+ live_ranges[index].end = point;
+ }
+ }
+ }
+ live_ranges_map.put (bb, live_ranges);
+ if (dump_enabled_p ())
+ for (i = 0; i < live_ranges.length (); i++)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "%T: type = %T, start = %d, end = %d\n",
+ live_ranges[i].var,
+ TREE_TYPE (live_ranges[i].var),
+ live_ranges[i].start, live_ranges[i].end);
+ }
+ }
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "Biggest mode = %s\n",
+ GET_MODE_NAME (biggest_mode));
+ return biggest_mode;
+}
+
+static unsigned int
+compute_nregs_for_mode (machine_mode mode, machine_mode biggest_mode, int lmul)
+{
+ unsigned int mode_size = GET_MODE_SIZE (mode).to_constant ();
+ unsigned int biggest_size = GET_MODE_SIZE (biggest_mode).to_constant ();
+ gcc_assert (biggest_size >= mode_size);
+ unsigned int ratio = biggest_size / mode_size;
+ return lmul / ratio;
+}
+
+static bool
+live_range_conflict_p (const var_live_range &live_range1,
+ const var_live_range &live_range2)
+{
+ if (live_range1.start >= live_range2.end)
+ return false;
+ if (live_range1.end <= live_range2.start)
+ return false;
+ if (live_range2.start >= live_range1.end)
+ return false;
+ if (live_range2.end <= live_range1.start)
+ return false;
+ return true;
+}
+
+static unsigned int
+max_number_of_live_regs (const basic_block bb,
+ const vec<var_live_range> &live_ranges,
+ machine_mode biggest_mode, int lmul)
+{
+ unsigned int max_nregs = 0;
+ unsigned int i, j, k;
+ unsigned int live_point = 0;
+ for (i = 0; i < live_ranges.length (); i++)
+ {
+ auto_vec<var_live_range> conflict_live_ranges;
+ var_live_range live_range = live_ranges[i];
+ conflict_live_ranges.safe_push (live_range);
+ unsigned int min_point = live_range.start;
+ unsigned int max_point = live_range.end;
+ for (j = 0; j < live_ranges.length (); j++)
+ {
+ if (j == i)
+ continue;
+ if (live_range_conflict_p (live_range, live_ranges[j]))
+ {
+ conflict_live_ranges.safe_push (live_ranges[j]);
+ min_point
+ = std::min (min_point, (unsigned int) live_ranges[j].start);
+ max_point
+ = std::max (max_point, (unsigned int) live_ranges[j].end);
+ }
+ }
+ for (j = min_point; j <= max_point; j++)
+ {
+ unsigned int nregs = 0;
+ for (k = 0; k < conflict_live_ranges.length (); k++)
+ {
+ if (j >= (unsigned int) conflict_live_ranges[k].start
+ && j <= (unsigned int) conflict_live_ranges[k].end)
+ {
+ machine_mode mode
+ = TYPE_MODE (TREE_TYPE (conflict_live_ranges[k].var));
+ nregs += compute_nregs_for_mode (mode, biggest_mode, lmul);
+ }
+ }
+ if (nregs > max_nregs)
+ {
+ max_nregs = nregs;
+ live_point = j;
+ }
+ }
+ }
+
+ /* Collect user explicit RVV type. */
+ hash_set<basic_block> all_preds = get_all_predecessors (bb);
+ hash_set<basic_block> all_succs = get_all_successors (bb);
+ for (i = 0; i < cfun->gimple_df->ssa_names->length (); i++)
+ {
+ tree t = ssa_name (i);
+ if (!t)
+ continue;
+ machine_mode mode = TYPE_MODE (TREE_TYPE (t));
+ if (!lookup_vector_type_attribute (TREE_TYPE (t))
+ && !riscv_v_ext_vls_mode_p (mode))
+ continue;
+
+ gimple *def = SSA_NAME_DEF_STMT (t);
+ if (gimple_bb (def) && !all_preds.contains (gimple_bb (def)))
+ continue;
+ const ssa_use_operand_t *const head = &(SSA_NAME_IMM_USE_NODE (t));
+ const ssa_use_operand_t *ptr;
+
+ for (ptr = head->next; ptr != head; ptr = ptr->next)
+ {
+ if (USE_STMT (ptr) && !is_gimple_debug (USE_STMT (ptr)))
+ {
+ if (all_succs.contains (gimple_bb (USE_STMT (ptr))))
+ {
+ int regno_alignment = riscv_get_v_regno_alignment (mode);
+ max_nregs += regno_alignment;
+ if (dump_enabled_p ())
+ dump_printf_loc (
+ MSG_NOTE, vect_location,
+ "Explicit used SSA %T, vectype = %T, mode = %s, cause %d "
+ "V_REG live in bb %d at program point %d\n",
+ t, TREE_TYPE (t), GET_MODE_NAME (mode), regno_alignment,
+ bb->index, live_point);
+ break;
+ }
+ }
+ }
+ }
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Maximum lmul = %d, %d number of live V_REG at program "
+ "point %d for bb %d\n",
+ lmul, max_nregs, live_point, bb->index);
+ return max_nregs;
+}
+
+static int
+compute_lmul (class loop *loop)
+{
+ unsigned int current_lmul = loop_autovec_infos.get (loop)->current_lmul;
+ return current_lmul;
+}
+
+static void
+update_local_live_ranges (
+ vec_info *vinfo, hash_map<basic_block, vec<stmt_point>> &program_points_map,
+ hash_map<basic_block, vec<var_live_range>> &live_ranges_map)
+{
+ if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
+ {
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+ unsigned int nbbs = loop->num_nodes;
+ unsigned int i, j, k;
+ gphi_iterator psi;
+ for (i = 0; i < nbbs; i++)
+ {
+ basic_block bb = bbs[i];
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Update local program points for bb %d:\n",
+ bb->index);
+ for (psi = gsi_start_phis (bbs[i]); !gsi_end_p (psi); gsi_next (&psi))
+ {
+ gphi *phi = psi.phi ();
+ stmt_vec_info stmt_info = vinfo->lookup_stmt (phi);
+ if (STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info))
+ != undef_vec_info_type)
+ {
+ for (j = 0; j < gimple_phi_num_args (phi); j++)
+ {
+ edge e = gimple_phi_arg_edge (phi, j);
+ tree def = gimple_phi_arg_def (phi, j);
+ auto *live_ranges = live_ranges_map.get (e->src);
+ if (!program_points_map.get (e->src))
+ continue;
+ int max_point
+ = (*program_points_map.get (e->src)).length () - 1;
+ for (k = 0; k < (*live_ranges).length (); k++)
+ {
+ if ((*live_ranges)[i].var == def)
+ {
+ int end = (*live_ranges)[i].end;
+ (*live_ranges)[i].end = max_point;
+ if (dump_enabled_p ())
+ dump_printf_loc (
+ MSG_NOTE, vect_location,
+ "Update %T end point from %d to %d:\n",
+ (*live_ranges)[i].var, end,
+ (*live_ranges)[i].end);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
costs::costs (vec_info *vinfo, bool costing_for_scalar)
: vector_costs (vinfo, costing_for_scalar)
{}
+bool
+costs::prefer_new_lmul_p (const vector_costs *uncast_other) const
+{
+ auto other = static_cast<const costs *> (uncast_other);
+ auto this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
+ auto other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
+ class loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo);
+
+ if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (this_loop_vinfo)
+ && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (other_loop_vinfo))
+ return false;
+
+ if (loop_autovec_infos.get (loop) && loop_autovec_infos.get (loop)->end_p)
+ return false;
+ else if (loop_autovec_infos.get (loop))
+ loop_autovec_infos.get (loop)->current_lmul
+ = loop_autovec_infos.get (loop)->current_lmul / 2;
+ else
+ {
+ int regno_alignment
+ = riscv_get_v_regno_alignment (other_loop_vinfo->vector_mode);
+ if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (other_loop_vinfo), 1U))
+ regno_alignment = RVV_M8;
+ loop_autovec_infos.put (loop, {regno_alignment, regno_alignment, false});
+ }
+
+ int lmul = compute_lmul (loop);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Comparing two main loops (%s at VF %d vs %s at VF %d)\n",
+ GET_MODE_NAME (this_loop_vinfo->vector_mode),
+ vect_vf_for_cost (this_loop_vinfo),
+ GET_MODE_NAME (other_loop_vinfo->vector_mode),
+ vect_vf_for_cost (other_loop_vinfo));
+
+ /* Compute local program points.
+ It's a fast and effective computation. */
+ hash_map<basic_block, vec<stmt_point>> program_points_map;
+ compute_local_program_points (other->m_vinfo, program_points_map);
+
+ /* Compute local live ranges. */
+ hash_map<basic_block, vec<var_live_range>> live_ranges_map;
+ machine_mode biggest_mode
+ = compute_local_live_ranges (program_points_map, live_ranges_map);
+
+ /* Update live ranges according to PHI. */
+ update_local_live_ranges (other->m_vinfo, program_points_map,
+ live_ranges_map);
+
+ /* TODO: We calculate the maximum live vars base on current STMTS
+ sequence. We can support live range shrink if it can give us
+ big improvement in the future. */
+ if (!program_points_map.is_empty ())
+ {
+ for (hash_map<basic_block, vec<stmt_point>>::iterator iter
+ = program_points_map.begin ();
+ iter != program_points_map.end (); ++iter)
+ {
+ vec<stmt_point> program_points = (*iter).second;
+ if (!program_points.is_empty ())
+ program_points.release ();
+ }
+ program_points_map.empty ();
+ }
+ if (!live_ranges_map.is_empty ())
+ {
+ unsigned int max_nregs = 0;
+ for (hash_map<basic_block, vec<var_live_range>>::iterator iter
+ = live_ranges_map.begin ();
+ iter != live_ranges_map.end (); ++iter)
+ {
+ basic_block bb = (*iter).first;
+ vec<var_live_range> live_ranges = (*iter).second;
+ if (live_ranges.is_empty ())
+ continue;
+ /* We prefer larger LMUL unless it causes register spillings. */
+ unsigned int nregs
+ = max_number_of_live_regs (bb, live_ranges, biggest_mode, lmul);
+ if (nregs > max_nregs)
+ max_nregs = nregs;
+ live_ranges.release ();
+ }
+ live_ranges_map.empty ();
+ if (loop_autovec_infos.get (loop)->current_lmul == RVV_M1
+ || max_nregs <= V_REG_NUM)
+ loop_autovec_infos.get (loop)->end_p = true;
+ if (loop_autovec_infos.get (loop)->current_lmul > RVV_M1)
+ return max_nregs > V_REG_NUM;
+ return false;
+ }
+ return lmul > RVV_M1;
+}
+
+bool
+costs::better_main_loop_than_p (const vector_costs *uncast_other) const
+{
+ auto other = static_cast<const costs *> (uncast_other);
+
+ if (!flag_vect_cost_model)
+ return vector_costs::better_main_loop_than_p (other);
+
+ if (riscv_autovec_lmul == RVV_DYNAMIC)
+ return prefer_new_lmul_p (uncast_other);
+
+ return vector_costs::better_main_loop_than_p (other);
+}
+
unsigned
costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
stmt_vec_info stmt_info, slp_tree, tree vectype,
@@ -23,6 +23,27 @@
namespace riscv_vector {
+struct stmt_point
+{
+ /* Program point. */
+ int point;
+ gimple *stmt;
+};
+
+struct var_live_range
+{
+ tree var;
+ int start;
+ int end;
+};
+
+struct autovec_info
+{
+ unsigned int initial_lmul;
+ unsigned int current_lmul;
+ bool end_p;
+};
+
/* rvv-specific vector costs. */
class costs : public vector_costs
{
@@ -31,12 +52,16 @@ class costs : public vector_costs
public:
costs (vec_info *, bool);
+ bool better_main_loop_than_p (const vector_costs *other) const override;
+
private:
unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
stmt_vec_info stmt_info, slp_tree node,
tree vectype, int misalign,
vect_cost_model_location where) override;
void finish_cost (const vector_costs *) override;
+
+ bool prefer_new_lmul_p (const vector_costs *) const;
};
} // namespace riscv_vector
@@ -70,7 +70,8 @@ riscv-vsetvl.o: $(srcdir)/config/riscv/riscv-vsetvl.cc \
riscv-vector-costs.o: $(srcdir)/config/riscv/riscv-vector-costs.cc \
$(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TARGET_H) $(FUNCTION_H) \
$(TREE_H) basic-block.h $(RTL_H) gimple.h targhooks.h cfgloop.h \
- fold-const.h $(TM_P_H) tree-vectorizer.h \
+ fold-const.h $(TM_P_H) tree-vectorizer.h gimple-iterator.h bitmap.h \
+ ssa.h backend.h \
$(srcdir)/config/riscv/riscv-vector-costs.h
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/riscv/riscv-vector-costs.cc
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
+ int32_t *__restrict a2, int32_t *__restrict b2, int32_t *__restrict c2,
+ int32_t *__restrict a3, int32_t *__restrict b3, int32_t *__restrict c3,
+ int32_t *__restrict a4, int32_t *__restrict b4, int32_t *__restrict c4,
+ int32_t *__restrict a5, int32_t *__restrict b5, int32_t *__restrict c5,
+ int32_t *__restrict d, int32_t *__restrict d2, int32_t *__restrict d3,
+ int32_t *__restrict d4, int32_t *__restrict d5, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = d5[i] + b[i];
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = b[i] + c[i];
+ b5[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ d2[i] = a2[i] + c2[i];
+ d3[i] = a3[i] + c3[i];
+ d4[i] = a4[i] + c4[i];
+ d5[i] = a[i] + a4[i];
+ a[i] = a5[i] + b5[i] + a[i];
+
+ c2[i] = a[i] + c[i];
+ c3[i] = b5[i] * a5[i];
+ c4[i] = a2[i] * a3[i];
+ c5[i] = b5[i] * a2[i];
+ c[i] = a[i] + c3[i];
+ c2[i] = a[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ a[i] = a[i] + b5[i]
+ + a[i] * a2[i] * a3[i] * a4[i] * a5[i] * c[i] * c2[i] * c3[i]
+ * c4[i] * c5[i] * d[i] * d2[i] * d3[i] * d4[i] * d5[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e32,m2} } } */
+/* { dg-final { scan-assembler {e32,m8} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,91 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
+ int32_t *__restrict a2, int32_t *__restrict b2, int32_t *__restrict c2,
+ int32_t *__restrict a3, int32_t *__restrict b3, int32_t *__restrict c3,
+ int32_t *__restrict a4, int32_t *__restrict b4, int32_t *__restrict c4,
+ int32_t *__restrict a5, int32_t *__restrict b5, int32_t *__restrict c5,
+ int32_t *__restrict d,
+ int32_t *__restrict d2,
+ int32_t *__restrict d3,
+ int32_t *__restrict d4,
+ int32_t *__restrict d5,
+ int32_t *__restrict e,
+ int32_t *__restrict e2,
+ int32_t *__restrict e3,
+ int32_t *__restrict e4,
+ int32_t *__restrict e5,
+ int32_t *__restrict f,
+ int32_t *__restrict f2,
+ int32_t *__restrict f3,
+ int32_t *__restrict f4,
+ int32_t *__restrict f5,
+ int32_t *__restrict g,
+ int32_t *__restrict g2,
+ int32_t *__restrict g3,
+ int32_t *__restrict g4,
+ int32_t *__restrict g5,
+ int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = b[i] + c[i];
+ b5[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+
+ d2[i] = a2[i] + c2[i];
+ d3[i] = a3[i] + c3[i];
+ d4[i] = a4[i] + c4[i];
+ d5[i] = a[i] + a4[i];
+ a[i] = a5[i] + b5[i] + a[i];
+
+ c2[i] = a[i] + c[i];
+ c3[i] = b5[i] * a5[i];
+ c4[i] = a2[i] * a3[i];
+ c5[i] = b5[i] * a2[i];
+ c[i] = a[i] + c3[i];
+ c2[i] = a[i] + c4[i];
+
+ e[i] = c2[i] + c2[i];
+ e2[i] = c2[i] + d2[i];
+ e3[i] = d3[i] + d3[i];
+ e4[i] = c4[i] + a4[i];
+ e5[i] = a[i] + a4[i];
+ a5[i] = a[i] + a4[i];
+
+ f[i] = e2[i] + c2[i];
+ f2[i] = e2[i] + d2[i];
+ f3[i] = e3[i] + d3[i];
+ f4[i] = e4[i] + a4[i];
+ f5[i] = e[i] + a4[i];
+ f5[i] = e5[i] + a4[i];
+
+ g[i] = f2[i] + c2[i];
+ g2[i] = f2[i] + d2[i];
+ g3[i] = f3[i] + d3[i];
+ g4[i] = f4[i] + a4[i];
+ g5[i] = f[i] + a4[i];
+ g5[i] = f5[i] + a4[i];
+
+ a[i] = a[i] + b5[i] + a[i] * a2[i] * a3[i] * a4[i]
+ * a5[i] * c[i] * c2[i] * c3[i] * c4[i] * c5[i]
+ * d[i] * d2[i] * d3[i] * d4[i] * d5[i]
+ * e[i] * e2[i] * e3[i] * e4[i] * e5[i]
+ * f[i] * f2[i] * f3[i] * f4[i] * f5[i]
+ * g[i] * g2[i] * g3[i] * g4[i] * g5[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e32,m1} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 1" 1 "vect" } } */
new file mode 100644
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fno-schedule-insns -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
+ int32_t *__restrict a2, int32_t *__restrict b2, int32_t *__restrict c2,
+ int32_t *__restrict a3, int32_t *__restrict b3, int32_t *__restrict c3,
+ int32_t *__restrict a4, int32_t *__restrict b4, int32_t *__restrict c4,
+ int32_t *__restrict a5, int32_t *__restrict b5, int32_t *__restrict c5,
+ int32_t *__restrict d,
+ int32_t *__restrict d2,
+ int32_t *__restrict d3,
+ int32_t *__restrict d4,
+ int32_t *__restrict d5,
+ int32_t *__restrict e,
+ int32_t *__restrict e2,
+ int32_t *__restrict e3,
+ int32_t *__restrict e4,
+ int32_t *__restrict e5,
+ int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = b[i] + c[i];
+ b5[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ e[i] = a2[i] + c2[i];
+ e2[i] = d2[i] + a2[i];
+ e3[i] = d3[i] + a3[i];
+ e4[i] = d4[i] + a4[i];
+ e5[i] = a[i] + a4[i];
+ d2[i] = a2[i] + c2[i];
+ d3[i] = a3[i] + c3[i];
+ d4[i] = a4[i] + c4[i];
+ d5[i] = a[i] + a4[i];
+ a[i] = a5[i] + b5[i] + a[i];
+
+ c2[i] = a[i] + c[i];
+ c3[i] = b5[i] * a5[i];
+ c4[i] = a2[i] * a3[i];
+ c5[i] = b5[i] * a2[i];
+ c[i] = a[i] + c3[i];
+ c2[i] = a[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ a[i] = a[i] + b5[i] + a[i] * a2[i] * a3[i] * a4[i]
+ * a5[i] * c[i] * c2[i] * c3[i] * c4[i] * c5[i]
+ * d[i] * d2[i] * d3[i] * d4[i] * d5[i]
+ * e[i] * e2[i] * e3[i] * e4[i] * e5[i];
+ }
+}
+
+/* FIXME: Choosing LMUL = 1 is not the optimal since it can be LMUL = 2 if we apply instruction scheduler. */
+/* { dg-final { scan-assembler {e32,m1} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 1" 1 "vect" } } */
new file mode 100644
@@ -0,0 +1,91 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict c,
+ int8_t *__restrict a2, int8_t *__restrict b2, int8_t *__restrict c2,
+ int8_t *__restrict a3, int8_t *__restrict b3, int8_t *__restrict c3,
+ int8_t *__restrict a4, int8_t *__restrict b4, int8_t *__restrict c4,
+ int8_t *__restrict a5, int8_t *__restrict b5, int8_t *__restrict c5,
+ int8_t *__restrict d,
+ int8_t *__restrict d2,
+ int8_t *__restrict d3,
+ int8_t *__restrict d4,
+ int8_t *__restrict d5,
+ int8_t *__restrict e,
+ int8_t *__restrict e2,
+ int8_t *__restrict e3,
+ int8_t *__restrict e4,
+ int8_t *__restrict e5,
+ int8_t *__restrict f,
+ int8_t *__restrict f2,
+ int8_t *__restrict f3,
+ int8_t *__restrict f4,
+ int8_t *__restrict f5,
+ int8_t *__restrict g,
+ int8_t *__restrict g2,
+ int8_t *__restrict g3,
+ int8_t *__restrict g4,
+ int8_t *__restrict g5,
+ int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = b[i] + c[i];
+ b5[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+
+ d2[i] = a2[i] + c2[i];
+ d3[i] = a3[i] + c3[i];
+ d4[i] = a4[i] + c4[i];
+ d5[i] = a[i] + a4[i];
+ a[i] = a5[i] + b5[i] + a[i];
+
+ c2[i] = a[i] + c[i];
+ c3[i] = b5[i] * a5[i];
+ c4[i] = a2[i] * a3[i];
+ c5[i] = b5[i] * a2[i];
+ c[i] = a[i] + c3[i];
+ c2[i] = a[i] + c4[i];
+
+ e[i] = c2[i] + c2[i];
+ e2[i] = c2[i] + d2[i];
+ e3[i] = d3[i] + d3[i];
+ e4[i] = c4[i] + a4[i];
+ e5[i] = a[i] + a4[i];
+ a5[i] = a[i] + a4[i];
+
+ f[i] = e2[i] + c2[i];
+ f2[i] = e2[i] + d2[i];
+ f3[i] = e3[i] + d3[i];
+ f4[i] = e4[i] + a4[i];
+ f5[i] = e[i] + a4[i];
+ f5[i] = e5[i] + a4[i];
+
+ g[i] = f2[i] + c2[i];
+ g2[i] = f2[i] + d2[i];
+ g3[i] = f3[i] + d3[i];
+ g4[i] = f4[i] + a4[i];
+ g5[i] = f[i] + a4[i];
+ g5[i] = f5[i] + a4[i];
+
+ a[i] = a[i] + b5[i] + a[i] * a2[i] * a3[i] * a4[i]
+ * a5[i] * c[i] * c2[i] * c3[i] * c4[i] * c5[i]
+ * d[i] * d2[i] * d3[i] * d4[i] * d5[i]
+ * e[i] * e2[i] * e3[i] * e4[i] * e5[i]
+ * f[i] * f2[i] * f3[i] * f4[i] * f5[i]
+ * g[i] * g2[i] * g3[i] * g4[i] * g5[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e8,m1} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 1" 1 "vect" } } */
new file mode 100644
@@ -0,0 +1,121 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
+ int32_t *__restrict a2, int32_t *__restrict b2, int32_t *__restrict c2,
+ int32_t *__restrict a3, int32_t *__restrict b3, int32_t *__restrict c3,
+ int32_t *__restrict a4, int32_t *__restrict b4, int32_t *__restrict c4,
+ int32_t *__restrict a5, int32_t *__restrict b5, int32_t *__restrict c5,
+ int32_t *__restrict d,
+ int32_t *__restrict d2,
+ int32_t *__restrict d3,
+ int32_t *__restrict d4,
+ int32_t *__restrict d5,
+ int32_t *__restrict e,
+ int32_t *__restrict e2,
+ int32_t *__restrict e3,
+ int32_t *__restrict e4,
+ int32_t *__restrict e5,
+ int32_t *__restrict f,
+ int32_t *__restrict f2,
+ int32_t *__restrict f3,
+ int32_t *__restrict f4,
+ int32_t *__restrict f5,
+ int32_t *__restrict g,
+ int32_t *__restrict g2,
+ int32_t *__restrict g3,
+ int32_t *__restrict g4,
+ int32_t *__restrict g5,
+
+ int32_t *__restrict gg,
+ int32_t *__restrict gg2,
+ int32_t *__restrict gg3,
+ int32_t *__restrict gg4,
+ int32_t *__restrict gg5,
+
+ int32_t *__restrict ggg,
+ int32_t *__restrict ggg2,
+ int32_t *__restrict ggg3,
+ int32_t *__restrict ggg4,
+ int32_t *__restrict ggg5,
+ int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = b[i] + c[i];
+ b5[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+
+ d2[i] = a2[i] + c2[i];
+ d3[i] = a3[i] + c3[i];
+ d4[i] = a4[i] + c4[i];
+ d5[i] = a[i] + a4[i];
+ a[i] = a5[i] + b5[i] + a[i];
+
+ c2[i] = a[i] + c[i];
+ c3[i] = b5[i] * a5[i];
+ c4[i] = a2[i] * a3[i];
+ c5[i] = b5[i] * a2[i];
+ c[i] = a[i] + c3[i];
+ c2[i] = a[i] + c4[i];
+
+ e[i] = c2[i] + c2[i];
+ e2[i] = c2[i] + d2[i];
+ e3[i] = d3[i] + d3[i];
+ e4[i] = c4[i] + a4[i];
+ e5[i] = a[i] + a4[i];
+ a5[i] = a[i] + a4[i];
+
+ f[i] = e2[i] + c2[i];
+ f2[i] = e2[i] + d2[i];
+ f3[i] = e3[i] + d3[i];
+ f4[i] = e4[i] + a4[i];
+ f5[i] = e[i] + a4[i];
+ f5[i] = e5[i] + a4[i];
+
+ g[i] = f2[i] + c2[i];
+ g2[i] = f2[i] + d2[i];
+ g3[i] = f3[i] + d3[i];
+ g4[i] = f4[i] + a4[i];
+ g5[i] = f[i] + a4[i];
+ g5[i] = f5[i] + a4[i];
+
+
+ gg[i] = f2[i] + c2[i];
+ gg2[i] = f2[i] + d2[i];
+ gg3[i] = f3[i] + d3[i];
+ gg4[i] = f4[i] + a4[i];
+ gg5[i] = f[i] + a4[i];
+ gg5[i] = f5[i] + a4[i];
+
+
+ ggg[i] = f2[i] + c2[i];
+ ggg2[i] = f2[i] + d2[i];
+ ggg3[i] = f3[i] + d3[i];
+ ggg4[i] = f4[i] + a4[i];
+ ggg5[i] = f[i] + a4[i];
+ ggg5[i] = f5[i] + a4[i];
+
+ a[i] = a[i] + b5[i] + a[i] * a2[i] * a3[i] * a4[i]
+ * a5[i] * c[i] * c2[i] * c3[i] * c4[i] * c5[i]
+ * d[i] * d2[i] * d3[i] * d4[i] * d5[i]
+ * e[i] * e2[i] * e3[i] * e4[i] * e5[i]
+ * f[i] * f2[i] * f3[i] * f4[i] * f5[i]
+ * g[i] * g2[i] * g3[i] * g4[i] * g5[i]
+ * gg[i] * gg2[i] * gg3[i] * gg4[i] * gg5[i]
+ * ggg[i] * ggg2[i] * ggg3[i] * ggg4[i] * ggg5[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e32,m1} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 1" 1 "vect" } } */
new file mode 100644
@@ -0,0 +1,149 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
+ int32_t *__restrict a2, int32_t *__restrict b2, int32_t *__restrict c2,
+ int32_t *__restrict a3, int32_t *__restrict b3, int32_t *__restrict c3,
+ int32_t *__restrict a4, int32_t *__restrict b4, int32_t *__restrict c4,
+ int32_t *__restrict a5, int32_t *__restrict b5, int32_t *__restrict c5,
+ int32_t *__restrict d,
+ int32_t *__restrict d2,
+ int32_t *__restrict d3,
+ int32_t *__restrict d4,
+ int32_t *__restrict d5,
+ int32_t *__restrict e,
+ int32_t *__restrict e2,
+ int32_t *__restrict e3,
+ int32_t *__restrict e4,
+ int32_t *__restrict e5,
+ int32_t *__restrict f,
+ int32_t *__restrict f2,
+ int32_t *__restrict f3,
+ int32_t *__restrict f4,
+ int32_t *__restrict f5,
+ int32_t *__restrict g,
+ int32_t *__restrict g2,
+ int32_t *__restrict g3,
+ int32_t *__restrict g4,
+ int32_t *__restrict g5,
+
+ int32_t *__restrict gg,
+ int32_t *__restrict gg2,
+ int32_t *__restrict gg3,
+ int32_t *__restrict gg4,
+ int32_t *__restrict gg5,
+
+ int32_t *__restrict ggg,
+ int32_t *__restrict ggg2,
+ int32_t *__restrict ggg3,
+ int32_t *__restrict ggg4,
+ int32_t *__restrict ggg5,
+
+ int32_t *__restrict gggg,
+ int32_t *__restrict gggg2,
+ int32_t *__restrict gggg3,
+ int32_t *__restrict gggg4,
+ int32_t *__restrict gggg5,
+
+ int32_t *__restrict ggggg,
+ int32_t *__restrict ggggg2,
+ int32_t *__restrict ggggg3,
+ int32_t *__restrict ggggg4,
+ int32_t *__restrict ggggg5,
+ int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = b[i] + c[i];
+ b5[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+
+ d2[i] = a2[i] + c2[i];
+ d3[i] = a3[i] + c3[i];
+ d4[i] = a4[i] + c4[i];
+ d5[i] = a[i] + a4[i];
+ a[i] = a5[i] + b5[i] + a[i];
+
+ c2[i] = a[i] + c[i];
+ c3[i] = b5[i] * a5[i];
+ c4[i] = a2[i] * a3[i];
+ c5[i] = b5[i] * a2[i];
+ c[i] = a[i] + c3[i];
+ c2[i] = a[i] + c4[i];
+
+ e[i] = c2[i] + c2[i];
+ e2[i] = c2[i] + d2[i];
+ e3[i] = d3[i] + d3[i];
+ e4[i] = c4[i] + a4[i];
+ e5[i] = a[i] + a4[i];
+ a5[i] = a[i] + a4[i];
+
+ f[i] = e2[i] + c2[i];
+ f2[i] = e2[i] + d2[i];
+ f3[i] = e3[i] + d3[i];
+ f4[i] = e4[i] + a4[i];
+ f5[i] = e[i] + a4[i];
+ f5[i] = e5[i] + a4[i];
+
+ g[i] = f2[i] + c2[i];
+ g2[i] = f2[i] + d2[i];
+ g3[i] = f3[i] + d3[i];
+ g4[i] = f4[i] + a4[i];
+ g5[i] = f[i] + a4[i];
+ g5[i] = f5[i] + a4[i];
+
+
+ gg[i] = f2[i] + c2[i];
+ gg2[i] = f2[i] + d2[i];
+ gg3[i] = f3[i] + d3[i];
+ gg4[i] = f4[i] + a4[i];
+ gg5[i] = f[i] + a4[i];
+ gg5[i] = f5[i] + a4[i];
+
+
+ ggg[i] = f2[i] + c2[i];
+ ggg2[i] = f2[i] + d2[i];
+ ggg3[i] = f3[i] + d3[i];
+ ggg4[i] = f4[i] + a4[i];
+ ggg5[i] = f[i] + a4[i];
+ ggg5[i] = f5[i] + a4[i];
+
+ gggg[i] = f2[i] + c2[i];
+ gggg2[i] = f2[i] + d2[i];
+ gggg3[i] = f3[i] + d3[i];
+ gggg4[i] = f4[i] + a4[i];
+ gggg5[i] = f[i] + a4[i];
+ gggg5[i] = f5[i] + a4[i];
+
+ ggggg[i] = f2[i] + c2[i];
+ ggggg2[i] = f2[i] + d2[i];
+ ggggg3[i] = f3[i] + d3[i];
+ ggggg4[i] = f4[i] + a4[i];
+ ggggg5[i] = f[i] + a4[i];
+ ggggg5[i] = f5[i] + a4[i];
+
+ a[i] = a[i] + b5[i] + a[i] * a2[i] * a3[i] * a4[i]
+ * a5[i] * c[i] * c2[i] * c3[i] * c4[i] * c5[i]
+ * d[i] * d2[i] * d3[i] * d4[i] * d5[i]
+ * e[i] * e2[i] * e3[i] * e4[i] * e5[i]
+ * f[i] * f2[i] * f3[i] * f4[i] * f5[i]
+ * g[i] * g2[i] * g3[i] * g4[i] * g5[i]
+ * gg[i] * gg2[i] * gg3[i] * gg4[i] * gg5[i]
+ * ggg[i] * ggg2[i] * ggg3[i] * ggg4[i] * ggg5[i]
+ * gggg[i] * gggg2[i] * gggg3[i] * gggg4[i] * gggg5[i]
+ * ggggg[i] * ggggg2[i] * ggggg3[i] * ggggg4[i] * ggggg5[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e32,m1} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 1" 1 "vect" } } */
new file mode 100644
@@ -0,0 +1,150 @@
+
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict c,
+ int8_t *__restrict a2, int8_t *__restrict b2, int8_t *__restrict c2,
+ int8_t *__restrict a3, int8_t *__restrict b3, int8_t *__restrict c3,
+ int8_t *__restrict a4, int8_t *__restrict b4, int8_t *__restrict c4,
+ int8_t *__restrict a5, int8_t *__restrict b5, int8_t *__restrict c5,
+ int8_t *__restrict d,
+ int8_t *__restrict d2,
+ int8_t *__restrict d3,
+ int8_t *__restrict d4,
+ int8_t *__restrict d5,
+ int8_t *__restrict e,
+ int8_t *__restrict e2,
+ int8_t *__restrict e3,
+ int8_t *__restrict e4,
+ int8_t *__restrict e5,
+ int8_t *__restrict f,
+ int8_t *__restrict f2,
+ int8_t *__restrict f3,
+ int8_t *__restrict f4,
+ int8_t *__restrict f5,
+ int8_t *__restrict g,
+ int8_t *__restrict g2,
+ int8_t *__restrict g3,
+ int8_t *__restrict g4,
+ int8_t *__restrict g5,
+
+ int8_t *__restrict gg,
+ int8_t *__restrict gg2,
+ int8_t *__restrict gg3,
+ int8_t *__restrict gg4,
+ int8_t *__restrict gg5,
+
+ int8_t *__restrict ggg,
+ int8_t *__restrict ggg2,
+ int8_t *__restrict ggg3,
+ int8_t *__restrict ggg4,
+ int8_t *__restrict ggg5,
+
+ int8_t *__restrict gggg,
+ int8_t *__restrict gggg2,
+ int8_t *__restrict gggg3,
+ int8_t *__restrict gggg4,
+ int8_t *__restrict gggg5,
+
+ int8_t *__restrict ggggg,
+ int8_t *__restrict ggggg2,
+ int8_t *__restrict ggggg3,
+ int8_t *__restrict ggggg4,
+ int8_t *__restrict ggggg5,
+ int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = b[i] + c[i];
+ b5[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+
+ d2[i] = a2[i] + c2[i];
+ d3[i] = a3[i] + c3[i];
+ d4[i] = a4[i] + c4[i];
+ d5[i] = a[i] + a4[i];
+ a[i] = a5[i] + b5[i] + a[i];
+
+ c2[i] = a[i] + c[i];
+ c3[i] = b5[i] * a5[i];
+ c4[i] = a2[i] * a3[i];
+ c5[i] = b5[i] * a2[i];
+ c[i] = a[i] + c3[i];
+ c2[i] = a[i] + c4[i];
+
+ e[i] = c2[i] + c2[i];
+ e2[i] = c2[i] + d2[i];
+ e3[i] = d3[i] + d3[i];
+ e4[i] = c4[i] + a4[i];
+ e5[i] = a[i] + a4[i];
+ a5[i] = a[i] + a4[i];
+
+ f[i] = e2[i] + c2[i];
+ f2[i] = e2[i] + d2[i];
+ f3[i] = e3[i] + d3[i];
+ f4[i] = e4[i] + a4[i];
+ f5[i] = e[i] + a4[i];
+ f5[i] = e5[i] + a4[i];
+
+ g[i] = f2[i] + c2[i];
+ g2[i] = f2[i] + d2[i];
+ g3[i] = f3[i] + d3[i];
+ g4[i] = f4[i] + a4[i];
+ g5[i] = f[i] + a4[i];
+ g5[i] = f5[i] + a4[i];
+
+
+ gg[i] = f2[i] + c2[i];
+ gg2[i] = f2[i] + d2[i];
+ gg3[i] = f3[i] + d3[i];
+ gg4[i] = f4[i] + a4[i];
+ gg5[i] = f[i] + a4[i];
+ gg5[i] = f5[i] + a4[i];
+
+
+ ggg[i] = f2[i] + c2[i];
+ ggg2[i] = f2[i] + d2[i];
+ ggg3[i] = f3[i] + d3[i];
+ ggg4[i] = f4[i] + a4[i];
+ ggg5[i] = f[i] + a4[i];
+ ggg5[i] = f5[i] + a4[i];
+
+ gggg[i] = f2[i] + c2[i];
+ gggg2[i] = f2[i] + d2[i];
+ gggg3[i] = f3[i] + d3[i];
+ gggg4[i] = f4[i] + a4[i];
+ gggg5[i] = f[i] + a4[i];
+ gggg5[i] = f5[i] + a4[i];
+
+ ggggg[i] = f2[i] + c2[i];
+ ggggg2[i] = f2[i] + d2[i];
+ ggggg3[i] = f3[i] + d3[i];
+ ggggg4[i] = f4[i] + a4[i];
+ ggggg5[i] = f[i] + a4[i];
+ ggggg5[i] = f5[i] + a4[i];
+
+ a[i] = a[i] + b5[i] + a[i] * a2[i] * a3[i] * a4[i]
+ * a5[i] * c[i] * c2[i] * c3[i] * c4[i] * c5[i]
+ * d[i] * d2[i] * d3[i] * d4[i] * d5[i]
+ * e[i] * e2[i] * e3[i] * e4[i] * e5[i]
+ * f[i] * f2[i] * f3[i] * f4[i] * f5[i]
+ * g[i] * g2[i] * g3[i] * g4[i] * g5[i]
+ * gg[i] * gg2[i] * gg3[i] * gg4[i] * gg5[i]
+ * ggg[i] * ggg2[i] * ggg3[i] * ggg4[i] * ggg5[i]
+ * gggg[i] * gggg2[i] * gggg3[i] * gggg4[i] * gggg5[i]
+ * ggggg[i] * ggggg2[i] * ggggg3[i] * ggggg4[i] * ggggg5[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e8,m1} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 1" 1 "vect" } } */
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -Wno-psabi -fdump-tree-vect-details" } */
+
+#include "riscv_vector.h"
+
+vint32m8_t
+foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
+ int32_t *__restrict a2, int32_t *__restrict b2, int32_t *__restrict c2,
+ int32_t *__restrict a3, int32_t *__restrict b3, int32_t *__restrict c3,
+ int32_t *__restrict a4, int32_t *__restrict b4, int32_t *__restrict c4,
+ int32_t *__restrict a5, int32_t *__restrict b5, int32_t *__restrict c5,
+ int32_t *__restrict d, int32_t *__restrict d2, int32_t *__restrict d3,
+ int32_t *__restrict d4, int32_t *__restrict d5, int n, vint32m8_t vector)
+{
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = b[i] + c[i];
+ b5[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ d2[i] = a2[i] + c2[i];
+ d3[i] = a3[i] + c3[i];
+ d4[i] = a4[i] + c4[i];
+ d5[i] = a[i] + a4[i];
+ a[i] = a5[i] + b5[i] + a[i];
+
+ c2[i] = a[i] + c[i];
+ c3[i] = b5[i] * a5[i];
+ c4[i] = a2[i] * a3[i];
+ c5[i] = b5[i] * a2[i];
+ c[i] = a[i] + c3[i];
+ c2[i] = a[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ a[i] = a[i] + b5[i]
+ + a[i] * a2[i] * a3[i] * a4[i] * a5[i] * c[i] * c2[i] * c3[i]
+ * c4[i] * c5[i] * d[i] * d2[i] * d3[i] * d4[i] * d5[i];
+ }
+ return vector;
+}
+
+/* { dg-final { scan-assembler {e32,m1} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 1" 1 "vect" } } */
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
+ int32_t *__restrict a2, int32_t *__restrict b2, int32_t *__restrict c2,
+ int32_t *__restrict a3, int32_t *__restrict b3, int32_t *__restrict c3,
+ int32_t *__restrict a4, int32_t *__restrict b4, int32_t *__restrict c4,
+ int32_t *__restrict a5, int32_t *__restrict b5, int32_t *__restrict c5,
+ int32_t *__restrict d,
+ int32_t *__restrict d2,
+ int32_t *__restrict d3,
+ int32_t *__restrict d4,
+ int32_t *__restrict d5,
+ int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = b[i] + c[i];
+ b5[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ d2[i] = a2[i] + c2[i];
+ d3[i] = a3[i] + c3[i];
+ d4[i] = a4[i] + c4[i];
+ d5[i] = a[i] + a4[i];
+ a[i] = a5[i] + b5[i] + a[i];
+
+ c2[i] = a[i] + c[i];
+ c3[i] = b5[i] * a5[i];
+ c4[i] = a2[i] * a3[i];
+ c5[i] = b5[i] * a2[i];
+ c[i] = a[i] + c3[i];
+ c2[i] = a[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ a[i] = a[i] + b5[i] + a[i] * a2[i] * a3[i] * a4[i]
+ * a5[i] * c[i] * c2[i] * c3[i] * c4[i] * c5[i]
+ * d[i] * d2[i] * d3[i] * d4[i] * d5[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e32,m2} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict c,
+ int8_t *__restrict a2, int8_t *__restrict b2, int8_t *__restrict c2,
+ int8_t *__restrict a3, int8_t *__restrict b3, int8_t *__restrict c3,
+ int8_t *__restrict a4, int8_t *__restrict b4, int8_t *__restrict c4,
+ int8_t *__restrict a5, int8_t *__restrict b5, int8_t *__restrict c5,
+ int8_t *__restrict d,
+ int8_t *__restrict d2,
+ int8_t *__restrict d3,
+ int8_t *__restrict d4,
+ int8_t *__restrict d5,
+ int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = b[i] + c[i];
+ b5[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ d2[i] = a2[i] + c2[i];
+ d3[i] = a3[i] + c3[i];
+ d4[i] = a4[i] + c4[i];
+ d5[i] = a[i] + a4[i];
+ a[i] = a5[i] + b5[i] + a[i];
+
+ c2[i] = a[i] + c[i];
+ c3[i] = b5[i] * a5[i];
+ c4[i] = a2[i] * a3[i];
+ c5[i] = b5[i] * a2[i];
+ c[i] = a[i] + c3[i];
+ c2[i] = a[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ a[i] = a[i] + b5[i] + a[i] * a2[i] * a3[i] * a4[i]
+ * a5[i] * c[i] * c2[i] * c3[i] * c4[i] * c5[i]
+ * d[i] * d2[i] * d3[i] * d4[i] * d5[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e8,m2} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
+ int32_t *__restrict a2, int32_t *__restrict b2, int32_t *__restrict c2,
+ int32_t *__restrict a3, int32_t *__restrict b3, int32_t *__restrict c3,
+ int32_t *__restrict a4, int32_t *__restrict b4, int32_t *__restrict c4,
+ int32_t *__restrict a5, int32_t *__restrict b5, int32_t *__restrict c5,
+ int32_t *__restrict d,
+ int32_t *__restrict d2,
+ int32_t *__restrict d3,
+ int32_t *__restrict d4,
+ int32_t *__restrict d5,
+ int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = b[i] + c[i];
+ b5[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ d2[i] = a2[i] + c2[i];
+ d3[i] = a3[i] + c3[i];
+ d4[i] = a4[i] + c4[i];
+ d5[i] = a[i] + a4[i];
+ a[i] = a5[i] + b5[i] + a[i];
+
+ c2[i] = a[i] + c[i];
+ c3[i] = b5[i] * a5[i];
+ c4[i] = a2[i] * a3[i];
+ c5[i] = b5[i] * a2[i];
+ c[i] = a[i] + c3[i];
+ c2[i] = a[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ a[i] = a[i] + b5[i] + a[i] * a2[i] * a3[i] * a4[i]
+ * a5[i] * c[i] * c2[i] * c3[i] * c4[i] * c5[i]
+ * d[i] * d2[i] * d3[i] * d4[i] * d5[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e32,m2} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include "riscv_vector.h"
+
+void
+foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
+ int32_t *__restrict a2, int32_t *__restrict b2, int32_t *__restrict c2,
+ int32_t *__restrict a3, int32_t *__restrict b3, int32_t *__restrict c3,
+ int32_t *__restrict a4, int32_t *__restrict b4, int32_t *__restrict c4,
+ int32_t *__restrict a5, int32_t *__restrict b5, int32_t *__restrict c5,
+ int32_t *__restrict d, int32_t *__restrict d2, int32_t *__restrict d3,
+ int32_t *__restrict d4, int32_t *__restrict d5, int n)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (a, 32);
+ __riscv_vse32_v_i32m1 (c, v, 32);
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = b[i] + c[i];
+ b5[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ d2[i] = a2[i] + c2[i];
+ d3[i] = a3[i] + c3[i];
+ d4[i] = a4[i] + c4[i];
+ d5[i] = a[i] + a4[i];
+ a[i] = a5[i] + b5[i] + a[i];
+
+ c2[i] = a[i] + c[i];
+ c3[i] = b5[i] * a5[i];
+ c4[i] = a2[i] * a3[i];
+ c5[i] = b5[i] * a2[i];
+ c[i] = a[i] + c3[i];
+ c2[i] = a[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ a[i] = a[i] + b5[i]
+ + a[i] * a2[i] * a3[i] * a4[i] * a5[i] * c[i] * c2[i] * c3[i]
+ * c4[i] * c5[i] * d[i] * d2[i] * d3[i] * d4[i] * d5[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e32,m2} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
+ int32_t *__restrict a2, int32_t *__restrict b2, int32_t *__restrict c2,
+ int32_t *__restrict a3, int32_t *__restrict b3, int32_t *__restrict c3,
+ int32_t *__restrict a4, int32_t *__restrict b4, int32_t *__restrict c4,
+ int32_t *__restrict a5, int32_t *__restrict b5, int32_t *__restrict c5,
+ int32_t *__restrict d, int32_t *__restrict d2, int32_t *__restrict d3,
+ int32_t *__restrict d4, int32_t *__restrict d5, int n, int m)
+{
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ d[i] = a[i] - a2[i];
+ d2[i] = a2[i] * a[i];
+ d3[i] = a3[i] * a2[i];
+ d4[i] = a2[i] * d2[i];
+ d5[i] = a[i] * a2[i] * a3[i] * a4[i] * d[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e32,m4} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict c,
+ int8_t *__restrict a2, int8_t *__restrict b2, int8_t *__restrict c2,
+ int8_t *__restrict a3, int8_t *__restrict b3, int8_t *__restrict c3,
+ int8_t *__restrict a4, int8_t *__restrict b4, int8_t *__restrict c4,
+ int8_t *__restrict a5, int8_t *__restrict b5, int8_t *__restrict c5,
+ int8_t *__restrict d, int8_t *__restrict d2, int8_t *__restrict d3,
+ int8_t *__restrict d4, int8_t *__restrict d5, int n, int m)
+{
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ d[i] = a[i] - a2[i];
+ d2[i] = a2[i] * a[i];
+ d3[i] = a3[i] * a2[i];
+ d4[i] = a2[i] * d2[i];
+ d5[i] = a[i] * a2[i] * a3[i] * a4[i] * d[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e8,m4} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void foo2 (int64_t *__restrict a,
+ int8_t *__restrict b,
+ int8_t *__restrict c,
+ int8_t *__restrict a2,
+ int8_t *__restrict b2,
+ int8_t *__restrict c2,
+ int8_t *__restrict a3,
+ int8_t *__restrict b3,
+ int8_t *__restrict c3,
+ int8_t *__restrict a4,
+ int8_t *__restrict b4,
+ int8_t *__restrict c4,
+ int64_t *__restrict a5,
+ int8_t *__restrict b5,
+ int8_t *__restrict c5,
+ int n)
+{
+ for (int i = 0; i < n; i++){
+ a[i] = b[i] + c[i];
+ b5[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ a[i] = a5[i] + b5[i]+ a[i];
+
+ a[i] = a[i] + c[i];
+ b5[i] = a[i] + c[i];
+ a2[i] = a[i] + c2[i];
+ a3[i] = a[i] + c3[i];
+ a4[i] = a[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ a[i] = a[i] + b5[i]+ a[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e64,m4} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void foo2 (int64_t *__restrict a,
+ int32_t *__restrict b,
+ int32_t *__restrict c,
+ int32_t *__restrict a2,
+ int32_t *__restrict b2,
+ int32_t *__restrict c2,
+ int32_t *__restrict a3,
+ int32_t *__restrict b3,
+ int32_t *__restrict c3,
+ int32_t *__restrict a4,
+ int32_t *__restrict b4,
+ int32_t *__restrict c4,
+ int64_t *__restrict a5,
+ int32_t *__restrict b5,
+ int32_t *__restrict c5,
+ int n)
+{
+ for (int i = 0; i < n; i++){
+ a[i] = b[i] + c[i];
+ b5[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ a[i] = a5[i] + b5[i]+ a[i];
+
+ a[i] = a[i] + c[i];
+ b5[i] = a[i] + c[i];
+ a2[i] = a[i] + c2[i];
+ a3[i] = a[i] + c3[i];
+ a4[i] = a[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ a[i] = a[i] + b5[i]+ a[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e64,m4} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void foo2 (int16_t *__restrict a,
+ int32_t *__restrict b,
+ int32_t *__restrict c,
+ int32_t *__restrict a2,
+ int32_t *__restrict b2,
+ int32_t *__restrict c2,
+ int32_t *__restrict a3,
+ int32_t *__restrict b3,
+ int32_t *__restrict c3,
+ int32_t *__restrict a4,
+ int32_t *__restrict b4,
+ int32_t *__restrict c4,
+ int16_t *__restrict a5,
+ int32_t *__restrict b5,
+ int32_t *__restrict c5,
+ int n)
+{
+ for (int i = 0; i < n; i++){
+ a[i] = b[i] + c[i];
+ b5[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ a[i] = a5[i] + b5[i]+ a[i];
+
+ a[i] = a[i] + c[i];
+ b5[i] = a[i] + c[i];
+ a2[i] = a[i] + c2[i];
+ a3[i] = a[i] + c3[i];
+ a4[i] = a[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ a[i] = a[i] + b5[i]+ a[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e32,m4} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fselective-scheduling -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (uint8_t *restrict a, uint8_t *restrict b, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ a[i * 8] = b[i * 8 + 7] + 1;
+ a[i * 8 + 1] = b[i * 8 + 6] + 2;
+ a[i * 8 + 2] = b[i * 8 + 5] + 3;
+ a[i * 8 + 3] = b[i * 8 + 4] + 4;
+ a[i * 8 + 4] = b[i * 8 + 3] + 5;
+ a[i * 8 + 5] = b[i * 8 + 2] + 6;
+ a[i * 8 + 6] = b[i * 8 + 1] + 7;
+ a[i * 8 + 7] = b[i * 8 + 0] + 8;
+ }
+}
+
+/* { dg-final { scan-assembler {e8,m4} } } */
+/* { dg-final { scan-assembler-times {csrr} 1 } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 8" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void foo2 (int8_t *__restrict a,
+ int64_t *__restrict b,
+ int64_t *__restrict c,
+ int64_t *__restrict a2,
+ int64_t *__restrict b2,
+ int64_t *__restrict c2,
+ int64_t *__restrict a3,
+ int64_t *__restrict b3,
+ int64_t *__restrict c3,
+ int64_t *__restrict a4,
+ int64_t *__restrict b4,
+ int64_t *__restrict c4,
+ int8_t *__restrict a5,
+ int64_t *__restrict b5,
+ int64_t *__restrict c5,
+ int n)
+{
+ for (int i = 0; i < n; i++){
+ a[i] = b[i] + c[i];
+ b5[i] = b[i] + c[i];
+ a2[i] = b2[i] + c2[i];
+ a3[i] = b3[i] + c3[i];
+ a4[i] = b4[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ a[i] = a5[i] + b5[i]+ a[i];
+
+ a[i] = a[i] + c[i];
+ b5[i] = a[i] + c[i];
+ a2[i] = a[i] + c2[i];
+ a3[i] = a[i] + c3[i];
+ a4[i] = a[i] + c4[i];
+ a5[i] = a[i] + a4[i];
+ a[i] = a[i] + b5[i]+ a[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e64,m4} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fselective-scheduling -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (uint8_t *restrict a, uint8_t *restrict b, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ a[i * 16] = b[i * 16 + 15] + 1;
+ a[i * 16 + 1] = b[i * 16 + 14] + 2;
+ a[i * 16 + 2] = b[i * 16 + 13] + 3;
+ a[i * 16 + 3] = b[i * 16 + 12] + 4;
+ a[i * 16 + 4] = b[i * 16 + 11] + 5;
+ a[i * 16 + 5] = b[i * 16 + 10] + 6;
+ a[i * 16 + 6] = b[i * 16 + 9] + 7;
+ a[i * 16 + 7] = b[i * 16 + 8] + 8;
+
+ a[i * 16 + 8] = b[i * 16 + 7] + 1;
+ a[i * 16 + 9] = b[i * 16 + 6] + 2;
+ a[i * 16 + 10] = b[i * 16 + 5] + 3;
+ a[i * 16 + 11] = b[i * 16 + 4] + 4;
+ a[i * 16 + 12] = b[i * 16 + 3] + 5;
+ a[i * 16 + 13] = b[i * 16 + 2] + 6;
+ a[i * 16 + 14] = b[i * 16 + 1] + 7;
+ a[i * 16 + 15] = b[i * 16 + 0] + 8;
+ }
+}
+
+/* { dg-final { scan-assembler {e8,m4} } } */
+/* { dg-final { scan-assembler-times {csrr} 1 } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 8" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int32_t *__restrict a, int32_t *__restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = a[i] + b[i];
+}
+
+/* { dg-final { scan-assembler {e32,m8} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+int
+foo (int *x, int n, int res)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ res += x[i * 2];
+ res += x[i * 2 + 1];
+ }
+ return res;
+}
+
+/* { dg-final { scan-assembler {e32,m8} } } */
+/* { dg-final { scan-assembler-times {csrr} 1 } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int32_t *__restrict a, int16_t *__restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = a[i] + b[i];
+}
+
+/* { dg-final { scan-assembler {e32,m8} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int8_t *__restrict a, int8_t *__restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = a[i] + b[i];
+}
+
+/* { dg-final { scan-assembler {e8,m8} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+#include <stddef.h>
+
+void
+foo (size_t *__restrict a, size_t *__restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = a[i] + b[i];
+}
+
+/* { dg-final { scan-assembler {e64,m8} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int8_t *__restrict a, int8_t *__restrict b, int n)
+{
+ for (int i = 0; i < n; i++){
+ a[i] = a[i] + b[i];
+ a[i] = a[i] + b[i];
+ a[i] = a[i] + b[i];
+ a[i] = a[i] + b[i];
+ a[i] = a[i] + b[i];
+ a[i] = a[i] + b[i];
+ a[i] = a[i] + b[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e8,m8} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict a2,
+ int8_t *__restrict b2, int8_t *__restrict a3, int8_t *__restrict b3,
+ int8_t *__restrict a4, int8_t *__restrict b4, int8_t *__restrict a5,
+ int8_t *__restrict b5, int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = b[i] * a2[i] * b2[i] * a3[i] * b3[i] * a4[i] * b4[i] * a5[i] * b5[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e8,m8} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict a2,
+ int32_t *__restrict b2, int32_t *__restrict a3, int32_t *__restrict b3,
+ int32_t *__restrict a4, int32_t *__restrict b4, int32_t *__restrict a5,
+ int32_t *__restrict b5, int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = b[i] * a2[i] * b2[i] * a3[i] * b3[i] * a4[i] * b4[i] * a5[i] * b5[i];
+ }
+}
+
+/* { dg-final { scan-assembler {e32,m8} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+int8_t
+foo (int8_t *__restrict a, int8_t init, int n)
+{
+ for (int i = 0; i < n; i++)
+ init += a[i];
+ return init;
+}
+
+/* { dg-final { scan-assembler {e8,m8} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+int64_t
+foo (int64_t *__restrict a, int64_t init, int n)
+{
+ for (int i = 0; i < n; i++)
+ init += a[i];
+ return init;
+}
+
+/* { dg-final { scan-assembler {e64,m8} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
new file mode 100644
@@ -0,0 +1,52 @@
+# Copyright (C) 2023-2023 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# Exit immediately if this isn't a riscv target.
+if { ![istarget riscv*-*-*] } then {
+ return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# If a testcase doesn't have special options, use these.
+global DEFAULT_CFLAGS
+if ![info exists DEFAULT_CFLAGS] then {
+ set DEFAULT_CFLAGS " -ansi -pedantic-errors"
+}
+
+set gcc_march "rv64gcv_zvfh"
+set gcc_mabi "lp64d"
+if [istarget riscv32-*-*] then {
+ set gcc_march "rv32gcv_zvfh"
+ set gcc_mabi "ilp32d"
+}
+
+# Initialize `dg'.
+dg-init
+
+# Main loop.
+set CFLAGS "$DEFAULT_CFLAGS -march=$gcc_march -mabi=$gcc_mabi -O3"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/dynamic-lmul*.\[cS\]]] \
+ "-O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic" $CFLAGS
+
+# All done.
+dg-finish