@@ -394,21 +394,32 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode)
{
gcc_assert (GET_MODE_BITSIZE (mode).is_constant ());
int regno_alignment = riscv_get_v_regno_alignment (loop_vinfo->vector_mode);
- if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo), 1U))
+ if (riscv_v_ext_vls_mode_p (loop_vinfo->vector_mode))
+ return regno_alignment;
+ else if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo), 1U)
+ || LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo).is_constant ())
{
int estimated_vf = vect_vf_for_cost (loop_vinfo);
return estimated_vf * GET_MODE_BITSIZE (mode).to_constant ()
/ TARGET_MIN_VLEN;
}
- else if (regno_alignment > 1)
- return regno_alignment;
else
{
- int ratio;
- if (can_div_trunc_p (BYTES_PER_RISCV_VECTOR,
- LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo),
- &ratio))
- return TARGET_MAX_LMUL / ratio;
+ /* Estimate the VLA SLP LMUL. */
+ if (regno_alignment > RVV_M1)
+ return regno_alignment;
+ else if (mode != QImode)
+ {
+ int ratio;
+ if (can_div_trunc_p (BYTES_PER_RISCV_VECTOR,
+ GET_MODE_SIZE (loop_vinfo->vector_mode), &ratio))
+ {
+ if (ratio == 1)
+ return RVV_M4;
+ else if (ratio == 2)
+ return RVV_M2;
+ }
+ }
}
return 0;
}
@@ -540,7 +551,10 @@ update_local_live_ranges (
stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
enum stmt_vec_info_type type
= STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
- if (non_contiguous_memory_access_p (stmt_info))
+ if (non_contiguous_memory_access_p (stmt_info)
+ /* LOAD_LANES/STORE_LANES doesn't need a perm indice. */
+ && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)
+ != VMAT_LOAD_STORE_LANES)
{
/* For non-adjacent load/store STMT, we will potentially
convert it into:
@@ -578,9 +592,6 @@ update_local_live_ranges (
static bool
has_unexpected_spills_p (loop_vec_info loop_vinfo)
{
- /* We don't apply dynamic LMUL cost model on VLS modes. */
- if (!riscv_v_ext_vector_mode_p (loop_vinfo->vector_mode))
- return false;
/* Compute local program points.
It's a fast and effective computation. */
hash_map<basic_block, vec<stmt_point>> program_points_per_bb;
@@ -682,7 +693,12 @@ costs::analyze_loop_vinfo (loop_vec_info loop_vinfo)
void
costs::record_potential_unexpected_spills (loop_vec_info loop_vinfo)
{
- if (riscv_autovec_lmul == RVV_DYNAMIC)
+ /* We only want to apply the heuristic if LOOP_VINFO is being
+ vectorized for VLA and known NITERS VLS loop. */
+ if (riscv_autovec_lmul == RVV_DYNAMIC
+ && (m_cost_type == VLA_VECTOR_COST
+ || (m_cost_type == VLS_VECTOR_COST
+ && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))))
{
bool post_dom_available_p = dom_info_available_p (CDI_POST_DOMINATORS);
if (!post_dom_available_p)
@@ -86,3 +86,6 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
/* { dg-final { scan-assembler {e32,m1} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -58,3 +58,6 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
/* { dg-final { scan-assembler {e32,m1} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -86,3 +86,6 @@ foo (int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict c,
/* { dg-final { scan-assembler {e8,m1} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -116,3 +116,6 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
/* { dg-final { scan-assembler {e32,m1} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -144,3 +144,6 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
/* { dg-final { scan-assembler {e32,m1} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -145,3 +145,6 @@ foo (int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict c,
/* { dg-final { scan-assembler {e8,m1} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -43,3 +43,6 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
/* { dg-final { scan-assembler {e32,m1} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -46,3 +46,6 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
/* { dg-final { scan-assembler {e32,m2} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -46,3 +46,6 @@ foo (int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict c,
/* { dg-final { scan-assembler {e8,m2} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -46,3 +46,6 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
/* { dg-final { scan-assembler {e32,m2} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -44,3 +44,6 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
/* { dg-final { scan-assembler {e32,m2} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -47,3 +47,6 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
/* { dg-final { scan-assembler {e8,m8} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -30,3 +30,6 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
/* { dg-final { scan-assembler {e32,m4} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -30,3 +30,6 @@ foo (int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict c,
/* { dg-final { scan-assembler {e8,m4} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -42,3 +42,6 @@ void foo2 (int64_t *__restrict a,
/* { dg-final { scan-assembler {e64,m4} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -42,3 +42,6 @@ void foo2 (int16_t *__restrict a,
/* { dg-final { scan-assembler {e16,m2} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic -fselective-scheduling -fdump-tree-vect-details" } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic --param riscv-autovec-preference=scalable -fselective-scheduling -fdump-tree-vect-details" } */
#include <stdint-gcc.h>
@@ -23,3 +23,6 @@ foo (uint8_t *restrict a, uint8_t *restrict b, int n)
/* { dg-final { scan-assembler-times {csrr} 1 } } */
/* Since we don't support VLA SLP for LMUL = 8, dynamic LMUL cost model start from LMUL = 4. */
/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 8" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -42,3 +42,6 @@ void foo2 (int8_t *__restrict a,
/* { dg-final { scan-assembler {e64,m4} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic -fselective-scheduling -fdump-tree-vect-details" } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic --param riscv-autovec-preference=scalable -fselective-scheduling -fdump-tree-vect-details" } */
#include <stdint-gcc.h>
@@ -32,3 +32,6 @@ foo (uint8_t *restrict a, uint8_t *restrict b, int n)
/* { dg-final { scan-assembler-times {csrr} 1 } } */
/* Since we don't support VLA SLP for LMUL = 8, dynamic LMUL cost model start from LMUL = 4. */
/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 8" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -13,3 +13,6 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int n)
/* { dg-final { scan-assembler {e32,m8} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -17,3 +17,6 @@ foo (int *x, int n, int res)
/* { dg-final { scan-assembler {e32,m8} } } */
/* { dg-final { scan-assembler-times {csrr} 1 } } */
/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -42,3 +42,6 @@ void foo2 (int64_t *__restrict a,
/* { dg-final { scan-assembler {e64,m8} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic --param riscv-autovec-preference=scalable -fselective-scheduling -fdump-tree-vect-details" } */
+
+void
+foo (int *restrict a, int *restrict b, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ a[i * 8] = b[i * 8 + 7] + 1;
+ a[i * 8 + 1] = b[i * 8 + 6] + 2;
+ a[i * 8 + 2] = b[i * 8 + 5] + 3;
+ a[i * 8 + 3] = b[i * 8 + 4] + 4;
+ a[i * 8 + 4] = b[i * 8 + 3] + 5;
+ a[i * 8 + 5] = b[i * 8 + 2] + 6;
+ a[i * 8 + 6] = b[i * 8 + 1] + 7;
+ a[i * 8 + 7] = b[i * 8 + 0] + 8;
+ }
+}
+
+/* { dg-final { scan-assembler {e32,m8} } } */
+/* { dg-final { scan-assembler-times {csrr} 1 } } */
+/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -13,3 +13,6 @@ foo (int32_t *__restrict a, int16_t *__restrict b, int n)
/* { dg-final { scan-assembler {e16,m4} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -13,3 +13,6 @@ foo (int8_t *__restrict a, int8_t *__restrict b, int n)
/* { dg-final { scan-assembler {e8,m8} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -14,3 +14,6 @@ foo (size_t *__restrict a, size_t *__restrict b, int n)
/* { dg-final { scan-assembler {e64,m8} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -20,3 +20,6 @@ foo (int8_t *__restrict a, int8_t *__restrict b, int n)
/* { dg-final { scan-assembler {e8,m8} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -18,3 +18,6 @@ foo (int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict a2,
/* { dg-final { scan-assembler {e8,m8} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -18,3 +18,6 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict a2,
/* { dg-final { scan-assembler {e32,m8} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -14,3 +14,6 @@ foo (int8_t *__restrict a, int8_t init, int n)
/* { dg-final { scan-assembler {e8,m8} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
@@ -14,3 +14,6 @@ foo (int64_t *__restrict a, int64_t init, int n)
/* { dg-final { scan-assembler {e64,m8} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic --param riscv-autovec-preference=fixed-vlmax -fdump-tree-vect-details" } */
+
+#define TYPE double
+#define N 200
+
+#include <complex.h>
+
+void addconjboth (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
+ _Complex TYPE c[restrict N])
+{
+#if defined (UNROLL)
+#pragma GCC unroll 16
+#endif
+ for (int i=0; i < N; i++)
+ c[i] = ~a[i] + ~b[i];
+}
+
+/* { dg-final { scan-assembler {e64,m4} } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4, At most 16 number of live V_REG" 1 "vect" } } */