RISC-V: Use merge approach to optimize vector permutation

Message ID 20230614042409.266841-1-juzhe.zhong@rivai.ai
State Unresolved
Headers
Series RISC-V: Use merge approach to optimize vector permutation |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

juzhe.zhong@rivai.ai June 14, 2023, 4:24 a.m. UTC
  From: Juzhe-Zhong <juzhe.zhong@rivai.ai>

This patch is to optimize the permuation case that is suiteable use
merge approach.

Consider this following case:
typedef int8_t vnx16qi __attribute__((vector_size (16)));

#define MASK_16		0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31

void __attribute__ ((noipa))
merge0 (vnx16qi x, vnx16qi y, vnx16qi *out)
{
  vnx16qi v = __builtin_shufflevector ((vnx16qi) x, (vnx16qi) y, MASK_16);
  *(vnx16qi*)out = v;
} 

The gimple IR:
v_3 = VEC_PERM_EXPR <x_1(D), y_2(D), { 0, 17, 2, 19, 4, 21, 6, 23, 8, 9, 10, 27, 12, 29, 14, 31 }>;

Selector = { 0, 17, 2, 19, 4, 21, 6, 23, 8, 9, 10, 27, 12, 29, 14, 31 }, the common expression:
{ 0, nunits + 1, 1, nunits + 2, 2, nunits + 3, ...  }

For this selector, we can use vmsltu + vmerge to optimize the codegen.

Before this patch:
merge0:
        addi    a5,sp,16
        vl1re8.v        v3,0(a5)
        li      a5,31
        vsetivli        zero,16,e8,m1,ta,mu
        vmv.v.x v2,a5
        lui     a5,%hi(.LANCHOR0)
        addi    a5,a5,%lo(.LANCHOR0)
        vl1re8.v        v1,0(a5)
        vl1re8.v        v4,0(sp)
        vand.vv v1,v1,v2
        vmsgeu.vi       v0,v1,16
        vrgather.vv     v2,v4,v1
        vadd.vi v1,v1,-16
        vrgather.vv     v2,v3,v1,v0.t
        vs1r.v  v2,0(a0)
        ret

After this patch:
merge0:
        addi    a5,sp,16
        vl1re8.v        v1,0(a5)
        lui     a5,%hi(.LANCHOR0)
        addi    a5,a5,%lo(.LANCHOR0)
        vsetivli        zero,16,e8,m1,ta,ma
        vl1re8.v        v0,0(a5)
        vl1re8.v        v2,0(sp)
        vmsltu.vi       v0,v0,16
        vmerge.vvm      v1,v1,v2,v0
        vs1r.v  v1,0(a0)
        ret

The key of this optimization is that:
1. mask = vmsltu (selector, nunits)
2. result = vmerge (op0, op1, mask)

gcc/ChangeLog:

        * config/riscv/riscv-v.cc (shuffle_merge_patterns): New pattern.
        (expand_vec_perm_const_1): Add merge optmization.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c: New test.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c: New test.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c: New test.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c: New test.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c: New test.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c: New test.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c: New test.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c: New test.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c: New test.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c: New test.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c: New test.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c: New test.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c: New test.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c: New test.

---
 gcc/config/riscv/riscv-v.cc                   |  52 +++++
 .../riscv/rvv/autovec/vls-vlmax/merge-1.c     | 101 +++++++++
 .../riscv/rvv/autovec/vls-vlmax/merge-2.c     | 103 +++++++++
 .../riscv/rvv/autovec/vls-vlmax/merge-3.c     | 109 +++++++++
 .../riscv/rvv/autovec/vls-vlmax/merge-4.c     | 122 ++++++++++
 .../riscv/rvv/autovec/vls-vlmax/merge-5.c     |  76 +++++++
 .../riscv/rvv/autovec/vls-vlmax/merge-6.c     |  51 +++++
 .../riscv/rvv/autovec/vls-vlmax/merge-7.c     |  25 +++
 .../riscv/rvv/autovec/vls-vlmax/merge_run-1.c | 119 ++++++++++
 .../riscv/rvv/autovec/vls-vlmax/merge_run-2.c | 121 ++++++++++
 .../riscv/rvv/autovec/vls-vlmax/merge_run-3.c | 150 +++++++++++++
 .../riscv/rvv/autovec/vls-vlmax/merge_run-4.c | 210 ++++++++++++++++++
 .../riscv/rvv/autovec/vls-vlmax/merge_run-5.c |  89 ++++++++
 .../riscv/rvv/autovec/vls-vlmax/merge_run-6.c |  59 +++++
 .../riscv/rvv/autovec/vls-vlmax/merge_run-7.c |  29 +++
 15 files changed, 1416 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c
  

Comments

Robin Dapp June 14, 2023, 3 p.m. UTC | #1
Hi Juzhe,

the general method seems sane and useful (it's not very complicated).
I was just distracted by

> Selector = { 0, 17, 2, 19, 4, 21, 6, 23, 8, 9, 10, 27, 12, 29, 14, 31 }, the common expression:
> { 0, nunits + 1, 1, nunits + 2, 2, nunits + 3, ...  }
> 
> For this selector, we can use vmsltu + vmerge to optimize the codegen.

because it's actually { 0, nunits + 1, 2, nunits + 3, ... } or maybe
{ 0, nunits, 0, nunits, ... } + { 0, 1, 2, 3, ..., nunits - 1 }.

Because of the ascending/monotonic? selector structure we can use vmerge
instead of vrgather.

> +/* Recognize the patterns that we can use merge operation to shuffle the
> +   vectors. The value of Each element (index i) in selector can only be
> +   either i or nunits + i.
> +
> +   E.g.
> +   v = VEC_PERM_EXPR (v0, v1, selector),
> +   selector = { 0, nunits + 1, 1, nunits + 2, 2, nunits + 3, ...  }

Same.

> +
> +   We can transform such pattern into:
> +
> +   v = vcond_mask (v0, v1, mask),
> +   mask = { 0, 1, 0, 1, 0, 1, ... }.  */
> +
> +static bool
> +shuffle_merge_patterns (struct expand_vec_perm_d *d)
> +{
> +  machine_mode vmode = d->vmode;
> +  machine_mode sel_mode = related_int_vector_mode (vmode).require ();
> +  int n_patterns = d->perm.encoding ().npatterns ();
> +  poly_int64 vec_len = d->perm.length ();
> +
> +  for (int i = 0; i < n_patterns; ++i)
> +    if (!known_eq (d->perm[i], i) && !known_eq (d->perm[i], vec_len + i))
> +      return false;
> +
> +  for (int i = n_patterns; i < n_patterns * 2; i++)
> +    if (!d->perm.series_p (i, n_patterns, i, n_patterns)
> +	&& !d->perm.series_p (i, n_patterns, vec_len + i, n_patterns))
> +      return false;

Maybe add a comment that we check that the pattern is actually monotonic
or however you prefet to call it?

I didn't go through all tests in detail but skimmed several.  All in all
looks good to me.

Regards
 Robin
  
Jeff Law June 14, 2023, 7:10 p.m. UTC | #2
On 6/14/23 09:00, Robin Dapp wrote:
> Hi Juzhe,
> 
> the general method seems sane and useful (it's not very complicated).
> I was just distracted by
> 
>> Selector = { 0, 17, 2, 19, 4, 21, 6, 23, 8, 9, 10, 27, 12, 29, 14, 31 }, the common expression:
>> { 0, nunits + 1, 1, nunits + 2, 2, nunits + 3, ...  }
>>
>> For this selector, we can use vmsltu + vmerge to optimize the codegen.
> 
> because it's actually { 0, nunits + 1, 2, nunits + 3, ... } or maybe
> { 0, nunits, 0, nunits, ... } + { 0, 1, 2, 3, ..., nunits - 1 }.
> 
> Because of the ascending/monotonic? selector structure we can use vmerge
> instead of vrgather.
> 
>> +/* Recognize the patterns that we can use merge operation to shuffle the
>> +   vectors. The value of Each element (index i) in selector can only be
>> +   either i or nunits + i.
>> +
>> +   E.g.
>> +   v = VEC_PERM_EXPR (v0, v1, selector),
>> +   selector = { 0, nunits + 1, 1, nunits + 2, 2, nunits + 3, ...  }
> 
> Same.
> 
>> +
>> +   We can transform such pattern into:
>> +
>> +   v = vcond_mask (v0, v1, mask),
>> +   mask = { 0, 1, 0, 1, 0, 1, ... }.  */
>> +
>> +static bool
>> +shuffle_merge_patterns (struct expand_vec_perm_d *d)
>> +{
>> +  machine_mode vmode = d->vmode;
>> +  machine_mode sel_mode = related_int_vector_mode (vmode).require ();
>> +  int n_patterns = d->perm.encoding ().npatterns ();
>> +  poly_int64 vec_len = d->perm.length ();
>> +
>> +  for (int i = 0; i < n_patterns; ++i)
>> +    if (!known_eq (d->perm[i], i) && !known_eq (d->perm[i], vec_len + i))
>> +      return false;
>> +
>> +  for (int i = n_patterns; i < n_patterns * 2; i++)
>> +    if (!d->perm.series_p (i, n_patterns, i, n_patterns)
>> +	&& !d->perm.series_p (i, n_patterns, vec_len + i, n_patterns))
>> +      return false;
> 
> Maybe add a comment that we check that the pattern is actually monotonic
> or however you prefet to call it?
> 
> I didn't go through all tests in detail but skimmed several.  All in all
> looks good to me.
So I think that means we want a V2 for the comment updates.  But I think 
we can go ahead and consider V2 pre-approved.

jeff
  
Li, Pan2 via Gcc-patches June 15, 2023, 1:55 a.m. UTC | #3
Addressed the comments in PATCH v2 as below.

https://gcc.gnu.org/pipermail/gcc-patches/2023-June/621789.html

Pan

-----Original Message-----
From: Gcc-patches <gcc-patches-bounces+pan2.li=intel.com@gcc.gnu.org> On Behalf Of Jeff Law via Gcc-patches
Sent: Thursday, June 15, 2023 3:11 AM
To: Robin Dapp <rdapp.gcc@gmail.com>; juzhe.zhong@rivai.ai; gcc-patches@gcc.gnu.org
Cc: kito.cheng@gmail.com; kito.cheng@sifive.com; palmer@dabbelt.com; palmer@rivosinc.com
Subject: Re: [PATCH] RISC-V: Use merge approach to optimize vector permutation



On 6/14/23 09:00, Robin Dapp wrote:
> Hi Juzhe,
> 
> the general method seems sane and useful (it's not very complicated).
> I was just distracted by
> 
>> Selector = { 0, 17, 2, 19, 4, 21, 6, 23, 8, 9, 10, 27, 12, 29, 14, 31 }, the common expression:
>> { 0, nunits + 1, 1, nunits + 2, 2, nunits + 3, ...  }
>>
>> For this selector, we can use vmsltu + vmerge to optimize the codegen.
> 
> because it's actually { 0, nunits + 1, 2, nunits + 3, ... } or maybe
> { 0, nunits, 0, nunits, ... } + { 0, 1, 2, 3, ..., nunits - 1 }.
> 
> Because of the ascending/monotonic? selector structure we can use vmerge
> instead of vrgather.
> 
>> +/* Recognize the patterns that we can use merge operation to shuffle the
>> +   vectors. The value of Each element (index i) in selector can only be
>> +   either i or nunits + i.
>> +
>> +   E.g.
>> +   v = VEC_PERM_EXPR (v0, v1, selector),
>> +   selector = { 0, nunits + 1, 1, nunits + 2, 2, nunits + 3, ...  }
> 
> Same.
> 
>> +
>> +   We can transform such pattern into:
>> +
>> +   v = vcond_mask (v0, v1, mask),
>> +   mask = { 0, 1, 0, 1, 0, 1, ... }.  */
>> +
>> +static bool
>> +shuffle_merge_patterns (struct expand_vec_perm_d *d)
>> +{
>> +  machine_mode vmode = d->vmode;
>> +  machine_mode sel_mode = related_int_vector_mode (vmode).require ();
>> +  int n_patterns = d->perm.encoding ().npatterns ();
>> +  poly_int64 vec_len = d->perm.length ();
>> +
>> +  for (int i = 0; i < n_patterns; ++i)
>> +    if (!known_eq (d->perm[i], i) && !known_eq (d->perm[i], vec_len + i))
>> +      return false;
>> +
>> +  for (int i = n_patterns; i < n_patterns * 2; i++)
>> +    if (!d->perm.series_p (i, n_patterns, i, n_patterns)
>> +	&& !d->perm.series_p (i, n_patterns, vec_len + i, n_patterns))
>> +      return false;
> 
> Maybe add a comment that we check that the pattern is actually monotonic
> or however you prefet to call it?
> 
> I didn't go through all tests in detail but skimmed several.  All in all
> looks good to me.
So I think that means we want a V2 for the comment updates.  But I think 
we can go ahead and consider V2 pre-approved.

jeff
  

Patch

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index e07d5c2901a..2b3aa2d125d 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2382,6 +2382,56 @@  struct expand_vec_perm_d
   bool testing_p;
 };
 
+/* Recognize the patterns that we can use merge operation to shuffle the
+   vectors. The value of Each element (index i) in selector can only be
+   either i or nunits + i.
+
+   E.g.
+   v = VEC_PERM_EXPR (v0, v1, selector),
+   selector = { 0, nunits + 1, 1, nunits + 2, 2, nunits + 3, ...  }
+
+   We can transform such pattern into:
+
+   v = vcond_mask (v0, v1, mask),
+   mask = { 0, 1, 0, 1, 0, 1, ... }.  */
+
+static bool
+shuffle_merge_patterns (struct expand_vec_perm_d *d)
+{
+  machine_mode vmode = d->vmode;
+  machine_mode sel_mode = related_int_vector_mode (vmode).require ();
+  int n_patterns = d->perm.encoding ().npatterns ();
+  poly_int64 vec_len = d->perm.length ();
+
+  for (int i = 0; i < n_patterns; ++i)
+    if (!known_eq (d->perm[i], i) && !known_eq (d->perm[i], vec_len + i))
+      return false;
+
+  for (int i = n_patterns; i < n_patterns * 2; i++)
+    if (!d->perm.series_p (i, n_patterns, i, n_patterns)
+	&& !d->perm.series_p (i, n_patterns, vec_len + i, n_patterns))
+      return false;
+
+  if (d->testing_p)
+    return true;
+
+  machine_mode mask_mode = get_mask_mode (vmode).require ();
+  rtx mask = gen_reg_rtx (mask_mode);
+
+  rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
+
+  /* MASK = SELECTOR < NUNTIS ? 1 : 0.  */
+  rtx x = gen_int_mode (vec_len, GET_MODE_INNER (sel_mode));
+  insn_code icode = code_for_pred_cmp_scalar (sel_mode);
+  rtx cmp = gen_rtx_fmt_ee (LTU, mask_mode, sel, x);
+  rtx ops[] = {mask, cmp, sel, x};
+  emit_vlmax_cmp_insn (icode, ops);
+
+  /* TARGET = MASK ? OP0 : OP1.  */
+  emit_insn (gen_vcond_mask (vmode, vmode, d->target, d->op0, d->op1, mask));
+  return true;
+}
+
 /* Recognize decompress patterns:
 
    1. VEC_PERM_EXPR op0 and op1
@@ -2503,6 +2553,8 @@  expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
     {
       if (d->vmode == d->op_mode)
 	{
+	  if (shuffle_merge_patterns (d))
+	    return true;
 	  if (shuffle_decompress_patterns (d))
 	    return true;
 	  if (shuffle_generic_patterns (d))
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c
new file mode 100644
index 00000000000..efeb23e9719
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c
@@ -0,0 +1,101 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size (16)));
+typedef int16_t vnx8hi __attribute__((vector_size (16)));
+typedef int32_t vnx4si __attribute__((vector_size (16)));
+typedef int64_t vnx2di __attribute__((vector_size (16)));
+typedef uint8_t vnx16uqi __attribute__((vector_size (16)));
+typedef uint16_t vnx8uhi __attribute__((vector_size (16)));
+typedef uint32_t vnx4usi __attribute__((vector_size (16)));
+typedef uint64_t vnx2udi __attribute__((vector_size (16)));
+
+typedef _Float16 vnx8hf __attribute__((vector_size (16)));
+typedef float vnx4sf __attribute__((vector_size (16)));
+typedef double vnx2df __attribute__((vector_size (16)));
+
+#define MASK_16		0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31
+#define MASK_8		0, 9, 2, 11, 4, 13, 6, 15
+#define MASK_4		0, 5, 2, 7
+#define MASK_2		0, 3
+
+void __attribute__ ((noipa))
+merge0 (vnx16qi x, vnx16qi y, vnx16qi *out)
+{
+  vnx16qi v = __builtin_shufflevector ((vnx16qi) x, (vnx16qi) y, MASK_16);
+  *(vnx16qi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx16uqi x, vnx16uqi y, vnx16uqi *out)
+{
+  vnx16uqi v = __builtin_shufflevector ((vnx16uqi) x, (vnx16uqi) y, MASK_16);
+  *(vnx16uqi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx8hi x, vnx8hi y, vnx8hi *out)
+{
+  vnx8hi v = __builtin_shufflevector ((vnx8hi) x, (vnx8hi) y, MASK_8);
+  *(vnx8hi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx8uhi x, vnx8uhi y, vnx8uhi *out)
+{
+  vnx8uhi v = __builtin_shufflevector ((vnx8uhi) x, (vnx8uhi) y, MASK_8);
+  *(vnx8uhi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge4 (vnx4si x, vnx4si y, vnx4si *out)
+{
+  vnx4si v = __builtin_shufflevector ((vnx4si) x, (vnx4si) y, MASK_4);
+  *(vnx4si*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge5 (vnx4usi x, vnx4usi y, vnx4usi *out)
+{
+  vnx4usi v = __builtin_shufflevector ((vnx4usi) x, (vnx4usi) y, MASK_4);
+  *(vnx4usi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx2di x, vnx2di y, vnx2di *out)
+{
+  vnx2di v = __builtin_shufflevector ((vnx2di) x, (vnx2di) y, MASK_2);
+  *(vnx2di*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge7 (vnx2udi x, vnx2udi y, vnx2udi *out)
+{
+  vnx2udi v = __builtin_shufflevector ((vnx2udi) x, (vnx2udi) y, MASK_2);
+  *(vnx2udi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge8 (vnx8hf x, vnx8hf y, vnx8hf *out)
+{
+  vnx8hf v = __builtin_shufflevector ((vnx8hf) x, (vnx8hf) y, MASK_8);
+  *(vnx8hf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge9 (vnx4sf x, vnx4sf y, vnx4sf *out)
+{
+  vnx4sf v = __builtin_shufflevector ((vnx4sf) x, (vnx4sf) y, MASK_4);
+  *(vnx4sf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge10 (vnx2df x, vnx2df y, vnx2df *out)
+{
+  vnx2df v = __builtin_shufflevector ((vnx2df) x, (vnx2df) y, MASK_2);
+  *(vnx2df*)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c
new file mode 100644
index 00000000000..35b2aa8aee9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c
@@ -0,0 +1,103 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx32qi __attribute__((vector_size (32)));
+typedef int16_t vnx16hi __attribute__((vector_size (32)));
+typedef int32_t vnx8si __attribute__((vector_size (32)));
+typedef int64_t vnx4di __attribute__((vector_size (32)));
+typedef uint8_t vnx32uqi __attribute__((vector_size (32)));
+typedef uint16_t vnx16uhi __attribute__((vector_size (32)));
+typedef uint32_t vnx8usi __attribute__((vector_size (32)));
+typedef uint64_t vnx4udi __attribute__((vector_size (32)));
+
+typedef _Float16 vnx16hf __attribute__((vector_size (32)));
+typedef float vnx8sf __attribute__((vector_size (32)));
+typedef double vnx4df __attribute__((vector_size (32)));
+
+#define MASK_32		0, 33, 2, 35, 4, 37, 6, 39, 8, 41,			\
+			  10, 43, 12, 45, 14, 47, 16, 49, 18, 51, 		\
+			  20, 53, 22, 55, 24, 57, 26, 59, 28, 61, 30, 63 
+#define MASK_16		0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31 
+#define MASK_8		0, 9, 2, 11, 4, 13, 6, 15 
+#define MASK_4		0, 5, 2, 7 
+
+void __attribute__ ((noipa))
+merge0 (vnx32qi x, vnx32qi y, vnx32qi *out)
+{
+  vnx32qi v = __builtin_shufflevector ((vnx32qi) x, (vnx32qi) y, MASK_32);
+  *(vnx32qi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx32uqi x, vnx32uqi y, vnx32uqi *out)
+{
+  vnx32uqi v = __builtin_shufflevector ((vnx32uqi) x, (vnx32uqi) y, MASK_32);
+  *(vnx32uqi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx16hi x, vnx16hi y, vnx16hi *out)
+{
+  vnx16hi v = __builtin_shufflevector ((vnx16hi) x, (vnx16hi) y, MASK_16);
+  *(vnx16hi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx16uhi x, vnx16uhi y, vnx16uhi *out)
+{
+  vnx16uhi v = __builtin_shufflevector ((vnx16uhi) x, (vnx16uhi) y, MASK_16);
+  *(vnx16uhi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge4 (vnx8si x, vnx8si y, vnx8si *out)
+{
+  vnx8si v = __builtin_shufflevector ((vnx8si) x, (vnx8si) y, MASK_8);
+  *(vnx8si*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge5 (vnx8usi x, vnx8usi y, vnx8usi *out)
+{
+  vnx8usi v = __builtin_shufflevector ((vnx8usi) x, (vnx8usi) y, MASK_8);
+  *(vnx8usi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx4di x, vnx4di y, vnx4di *out)
+{
+  vnx4di v = __builtin_shufflevector ((vnx4di) x, (vnx4di) y, MASK_4);
+  *(vnx4di*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge7 (vnx4udi x, vnx4udi y, vnx4udi *out)
+{
+  vnx4udi v = __builtin_shufflevector ((vnx4udi) x, (vnx4udi) y, MASK_4);
+  *(vnx4udi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge8 (vnx16hf x, vnx16hf y, vnx16hf *out)
+{
+  vnx16hf v = __builtin_shufflevector ((vnx16hf) x, (vnx16hf) y, MASK_16);
+  *(vnx16hf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge9 (vnx8sf x, vnx8sf y, vnx8sf *out)
+{
+  vnx8sf v = __builtin_shufflevector ((vnx8sf) x, (vnx8sf) y, MASK_8);
+  *(vnx8sf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge10 (vnx4df x, vnx4df y, vnx4df *out)
+{
+  vnx4df v = __builtin_shufflevector ((vnx4df) x, (vnx4df) y, MASK_4);
+  *(vnx4df*)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c
new file mode 100644
index 00000000000..957d5b26fdc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c
@@ -0,0 +1,109 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx64qi __attribute__((vector_size (64)));
+typedef int16_t vnx32hi __attribute__((vector_size (64)));
+typedef int32_t vnx16si __attribute__((vector_size (64)));
+typedef int64_t vnx8di __attribute__((vector_size (64)));
+typedef uint8_t vnx64uqi __attribute__((vector_size (64)));
+typedef uint16_t vnx32uhi __attribute__((vector_size (64)));
+typedef uint32_t vnx16usi __attribute__((vector_size (64)));
+typedef uint64_t vnx8udi __attribute__((vector_size (64)));
+
+typedef _Float16 vnx32hf __attribute__((vector_size (64)));
+typedef float vnx16sf __attribute__((vector_size (64)));
+typedef double vnx8df __attribute__((vector_size (64)));
+
+#define MASK_64		0, 65, 2, 67, 4, 69, 6, 71, 8, 73,			\
+			  10, 75, 12, 77, 14, 79, 16, 81, 18, 83, 		\
+			  20, 85, 22, 87, 24, 89, 26, 91, 28, 93, 30, 95,    \
+        32, 97, 34, 99, 36, 101, 38, 103, 40, 105,  \
+        42, 107, 44, 109, 46, 111, 48, 113, 50, 115, \
+        52, 117, 54, 119, 56, 121, 58, 123, 60, 125, \
+        62, 127
+#define MASK_32		0, 33, 2, 35, 4, 37, 6, 39, 8, 41,			\
+			  10, 43, 12, 45, 14, 47, 16, 49, 18, 51, 		\
+			  20, 53, 22, 55, 24, 57, 26, 59, 28, 61, 30, 63
+#define MASK_16		0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31
+#define MASK_8		0, 9, 2, 11, 4, 13, 6, 15
+
+void __attribute__ ((noipa))
+merge0 (vnx64qi x, vnx64qi y, vnx64qi *out)
+{
+  vnx64qi v = __builtin_shufflevector ((vnx64qi) x, (vnx64qi) y, MASK_64);
+  *(vnx64qi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx64uqi x, vnx64uqi y, vnx64uqi *out)
+{
+  vnx64uqi v = __builtin_shufflevector ((vnx64uqi) x, (vnx64uqi) y, MASK_64);
+  *(vnx64uqi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx32hi x, vnx32hi y, vnx32hi *out)
+{
+  vnx32hi v = __builtin_shufflevector ((vnx32hi) x, (vnx32hi) y, MASK_32);
+  *(vnx32hi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx32uhi x, vnx32uhi y, vnx32uhi *out)
+{
+  vnx32uhi v = __builtin_shufflevector ((vnx32uhi) x, (vnx32uhi) y, MASK_32);
+  *(vnx32uhi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge4 (vnx16si x, vnx16si y, vnx16si *out)
+{
+  vnx16si v = __builtin_shufflevector ((vnx16si) x, (vnx16si) y, MASK_16);
+  *(vnx16si*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge5 (vnx16usi x, vnx16usi y, vnx16usi *out)
+{
+  vnx16usi v = __builtin_shufflevector ((vnx16usi) x, (vnx16usi) y, MASK_16);
+  *(vnx16usi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx8di x, vnx8di y, vnx8di *out)
+{
+  vnx8di v = __builtin_shufflevector ((vnx8di) x, (vnx8di) y, MASK_8);
+  *(vnx8di*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge7 (vnx8udi x, vnx8udi y, vnx8udi *out)
+{
+  vnx8udi v = __builtin_shufflevector ((vnx8udi) x, (vnx8udi) y, MASK_8);
+  *(vnx8udi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge8 (vnx32hf x, vnx32hf y, vnx32hf *out)
+{
+  vnx32hf v = __builtin_shufflevector ((vnx32hf) x, (vnx32hf) y, MASK_32);
+  *(vnx32hf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge9 (vnx16sf x, vnx16sf y, vnx16sf *out)
+{
+  vnx16sf v = __builtin_shufflevector ((vnx16sf) x, (vnx16sf) y, MASK_16);
+  *(vnx16sf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge10 (vnx8df x, vnx8df y, vnx8df *out)
+{
+  vnx8df v = __builtin_shufflevector ((vnx8df) x, (vnx8df) y, MASK_8);
+  *(vnx8df*)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c
new file mode 100644
index 00000000000..398d0dcc649
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c
@@ -0,0 +1,122 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx128qi __attribute__((vector_size (128)));
+typedef int16_t vnx64hi __attribute__((vector_size (128)));
+typedef int32_t vnx32si __attribute__((vector_size (128)));
+typedef int64_t vnx16di __attribute__((vector_size (128)));
+typedef uint8_t vnx128uqi __attribute__((vector_size (128)));
+typedef uint16_t vnx64uhi __attribute__((vector_size (128)));
+typedef uint32_t vnx32usi __attribute__((vector_size (128)));
+typedef uint64_t vnx16udi __attribute__((vector_size (128)));
+
+typedef _Float16 vnx64hf __attribute__((vector_size (128)));
+typedef float vnx32sf __attribute__((vector_size (128)));
+typedef double vnx16df __attribute__((vector_size (128)));
+
+#define MASK_128		0, 129, 2, 131, 4, 133, 6, 135, 8, 137,			\
+			  10, 139, 12, 141, 14, 143, 16, 145, 18, 147, 		\
+			  20, 149, 22, 151, 24, 153, 26, 155, 28, 157, 30, 159,    \
+        32, 161, 34, 163, 36, 165, 38, 167, 40, 169,  \
+        42, 171, 44, 173, 46, 175, 48, 177, 50, 179, \
+        52, 181, 54, 183, 56, 185, 58, 187, 60, 189, \
+        62, 191, \
+        64, 193, 66, 195, 68, 197, 70, 199, 72, 201,		\
+			  74, 203, 76, 205, 78, 207, 80, 209, 82, 211, 		\
+			  84, 213, 86, 215, 88, 217, 90, 219, 92, 221, 94, 223,    \
+        96, 225, 98, 227, 100, 229, 102, 231, 104, 233,  \
+        106, 235, 108, 237, 110, 239, 112, 241, 114, 243, \
+        116, 245, 118, 247, 120, 249, 122, 251, 124, 253, \
+        126, 255
+#define MASK_64		0, 65, 2, 67, 4, 69, 6, 71, 8, 73,			\
+			  10, 75, 12, 77, 14, 79, 16, 81, 18, 83, 		\
+			  20, 85, 22, 87, 24, 89, 26, 91, 28, 93, 30, 95,    \
+        32, 97, 34, 99, 36, 101, 38, 103, 40, 105,  \
+        42, 107, 44, 109, 46, 111, 48, 113, 50, 115, \
+        52, 117, 54, 119, 56, 121, 58, 123, 60, 125, \
+        62, 127
+#define MASK_32		0, 33, 2, 35, 4, 37, 6, 39, 8, 41,			\
+			  10, 43, 12, 45, 14, 47, 16, 49, 18, 51, 		\
+			  20, 53, 22, 55, 24, 57, 26, 59, 28, 61, 30, 63
+#define MASK_16		0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31
+
+void __attribute__ ((noipa))
+merge0 (vnx128qi x, vnx128qi y, vnx128qi *out)
+{
+  vnx128qi v = __builtin_shufflevector ((vnx128qi) x, (vnx128qi) y, MASK_128);
+  *(vnx128qi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx128uqi x, vnx128uqi y, vnx128uqi *out)
+{
+  vnx128uqi v = __builtin_shufflevector ((vnx128uqi) x, (vnx128uqi) y, MASK_128);
+  *(vnx128uqi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx64hi x, vnx64hi y, vnx64hi *out)
+{
+  vnx64hi v = __builtin_shufflevector ((vnx64hi) x, (vnx64hi) y, MASK_64);
+  *(vnx64hi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx64uhi x, vnx64uhi y, vnx64uhi *out)
+{
+  vnx64uhi v = __builtin_shufflevector ((vnx64uhi) x, (vnx64uhi) y, MASK_64);
+  *(vnx64uhi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge4 (vnx32si x, vnx32si y, vnx32si *out)
+{
+  vnx32si v = __builtin_shufflevector ((vnx32si) x, (vnx32si) y, MASK_32);
+  *(vnx32si*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge5 (vnx32usi x, vnx32usi y, vnx32usi *out)
+{
+  vnx32usi v = __builtin_shufflevector ((vnx32usi) x, (vnx32usi) y, MASK_32);
+  *(vnx32usi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx16di x, vnx16di y, vnx16di *out)
+{
+  vnx16di v = __builtin_shufflevector ((vnx16di) x, (vnx16di) y, MASK_16);
+  *(vnx16di*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge7 (vnx16udi x, vnx16udi y, vnx16udi *out)
+{
+  vnx16udi v = __builtin_shufflevector ((vnx16udi) x, (vnx16udi) y, MASK_16);
+  *(vnx16udi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge8 (vnx64hf x, vnx64hf y, vnx64hf *out)
+{
+  vnx64hf v = __builtin_shufflevector ((vnx64hf) x, (vnx64hf) y, MASK_64);
+  *(vnx64hf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge9 (vnx32sf x, vnx32sf y, vnx32sf *out)
+{
+  vnx32sf v = __builtin_shufflevector ((vnx32sf) x, (vnx32sf) y, MASK_32);
+  *(vnx32sf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge10 (vnx16df x, vnx16df y, vnx16df *out)
+{
+  vnx16df v = __builtin_shufflevector ((vnx16df) x, (vnx16df) y, MASK_16);
+  *(vnx16df*)out = v;
+}
+
+/* dg-final scan-assembler-times {\tvmerge.vvm} 11 */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c
new file mode 100644
index 00000000000..4d1b9e29b7d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c
@@ -0,0 +1,76 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx8qi __attribute__((vector_size (8)));
+typedef int16_t vnx4hi __attribute__((vector_size (8)));
+typedef int32_t vnx2si __attribute__((vector_size (8)));
+typedef uint8_t vnx8uqi __attribute__((vector_size (8)));
+typedef uint16_t vnx4uhi __attribute__((vector_size (8)));
+typedef uint32_t vnx2usi __attribute__((vector_size (8)));
+
+typedef _Float16 vnx4hf __attribute__((vector_size (8)));
+typedef float vnx2sf __attribute__((vector_size (8)));
+
+#define MASK_8		0, 9, 2, 11, 4, 13, 6, 15
+#define MASK_4		0, 5, 2, 7
+#define MASK_2		0, 3
+
+void __attribute__ ((noipa))
+merge0 (vnx8qi x, vnx8qi y, vnx8qi *out)
+{
+  vnx8qi v = __builtin_shufflevector ((vnx8qi) x, (vnx8qi) y, MASK_8);
+  *(vnx8qi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx8uqi x, vnx8uqi y, vnx8uqi *out)
+{
+  vnx8uqi v = __builtin_shufflevector ((vnx8uqi) x, (vnx8uqi) y, MASK_8);
+  *(vnx8uqi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx4hi x, vnx4hi y, vnx4hi *out)
+{
+  vnx4hi v = __builtin_shufflevector ((vnx4hi) x, (vnx4hi) y, MASK_4);
+  *(vnx4hi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx4uhi x, vnx4uhi y, vnx4uhi *out)
+{
+  vnx4uhi v = __builtin_shufflevector ((vnx4uhi) x, (vnx4uhi) y, MASK_4);
+  *(vnx4uhi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge4 (vnx2si x, vnx2si y, vnx2si *out)
+{
+  vnx2si v = __builtin_shufflevector ((vnx2si) x, (vnx2si) y, MASK_2);
+  *(vnx2si *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge5 (vnx2usi x, vnx2usi y, vnx2usi *out)
+{
+  vnx2usi v = __builtin_shufflevector ((vnx2usi) x, (vnx2usi) y, MASK_2);
+  *(vnx2usi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx4hf x, vnx4hf y, vnx4hf *out)
+{
+  vnx4hf v = __builtin_shufflevector ((vnx4hf) x, (vnx4hf) y, MASK_4);
+  *(vnx4hf *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge7 (vnx2sf x, vnx2sf y, vnx2sf *out)
+{
+  vnx2sf v = __builtin_shufflevector ((vnx2sf) x, (vnx2sf) y, MASK_2);
+  *(vnx2sf *)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c
new file mode 100644
index 00000000000..43acea6c345
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c
@@ -0,0 +1,51 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx4qi __attribute__((vector_size (4)));
+typedef int16_t vnx2hi __attribute__((vector_size (4)));
+typedef uint8_t vnx4uqi __attribute__((vector_size (4)));
+typedef uint16_t vnx2uhi __attribute__((vector_size (4)));
+
+typedef _Float16 vnx2hf __attribute__((vector_size (4)));
+
+#define MASK_4		0, 5, 2, 7
+#define MASK_2		0, 3
+
+void __attribute__ ((noipa))
+merge0 (vnx4qi x, vnx4qi y, vnx4qi *out)
+{
+  vnx4qi v = __builtin_shufflevector ((vnx4qi) x, (vnx4qi) y, MASK_4);
+  *(vnx4qi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx4uqi x, vnx4uqi y, vnx4uqi *out)
+{
+  vnx4uqi v = __builtin_shufflevector ((vnx4uqi) x, (vnx4uqi) y, MASK_4);
+  *(vnx4uqi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx2hi x, vnx2hi y, vnx2hi *out)
+{
+  vnx2hi v = __builtin_shufflevector ((vnx2hi) x, (vnx2hi) y, MASK_2);
+  *(vnx2hi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx2uhi x, vnx2uhi y, vnx2uhi *out)
+{
+  vnx2uhi v = __builtin_shufflevector ((vnx2uhi) x, (vnx2uhi) y, MASK_2);
+  *(vnx2uhi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx2hf x, vnx2hf y, vnx2hf *out)
+{
+  vnx2hf v = __builtin_shufflevector ((vnx2hf) x, (vnx2hf) y, MASK_2);
+  *(vnx2hf*)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 5 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c
new file mode 100644
index 00000000000..2f38c3d13f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c
@@ -0,0 +1,25 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx2qi __attribute__((vector_size (2)));
+typedef uint8_t vnx2uqi __attribute__((vector_size (2)));
+
+#define MASK_2		0, 3
+
+void __attribute__ ((noipa))
+merge0 (vnx2qi x, vnx2qi y, vnx2qi *out)
+{
+  vnx2qi v = __builtin_shufflevector ((vnx2qi) x, (vnx2qi) y, MASK_2);
+  *(vnx2qi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx2uqi x, vnx2uqi y, vnx2uqi *out)
+{
+  vnx2uqi v = __builtin_shufflevector ((vnx2uqi) x, (vnx2uqi) y, MASK_2);
+  *(vnx2uqi *)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c
new file mode 100644
index 00000000000..7449f63583c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c
@@ -0,0 +1,119 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-1.c"
+
+int main(void)
+{
+    vnx16qi vnx16qi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+    vnx16qi vnx16qi_y= {201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216};
+    vnx16qi vnx16qi_expect= {1,202,3,204,5,206,7,208,9,210,11,212,13,214,15,216};
+    vnx16qi vnx16qi_real;
+    merge0(vnx16qi_x,vnx16qi_y, &vnx16qi_real);
+    for(int i=0; i<16; i++)
+        if(vnx16qi_real[i]!=vnx16qi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx16uqi vnx16uqi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+    vnx16uqi vnx16uqi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+    vnx16uqi vnx16uqi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+    vnx16uqi vnx16uqi_real;
+    merge1(vnx16uqi_x,vnx16uqi_y, &vnx16uqi_real);
+    for(int i=0; i<16; i++)
+        if(vnx16uqi_real[i]!=vnx16uqi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx8hi vnx8hi_x= {1,2,3,4,5,6,7,8};
+    vnx8hi vnx8hi_y= {101,102,103,104,105,106,107,108};
+    vnx8hi vnx8hi_expect= {1,102,3,104,5,106,7,108};
+    vnx8hi vnx8hi_real;
+    merge2(vnx8hi_x,vnx8hi_y, &vnx8hi_real);
+    for(int i=0; i<8; i++)
+        if(vnx8hi_real[i]!=vnx8hi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx8uhi vnx8uhi_x= {1,2,3,4,5,6,7,8};
+    vnx8uhi vnx8uhi_y= {101,102,103,104,105,106,107,108};
+    vnx8uhi vnx8uhi_expect= {1,102,3,104,5,106,7,108};
+    vnx8uhi vnx8uhi_real;
+    merge3(vnx8uhi_x,vnx8uhi_y, &vnx8uhi_real);
+    for(int i=0; i<8; i++)
+        if(vnx8uhi_real[i]!=vnx8uhi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx4si vnx4si_x= {1,2,3,4};
+    vnx4si vnx4si_y= {101,102,103,104};
+    vnx4si vnx4si_expect= {1,102,3,104};
+    vnx4si vnx4si_real;
+    merge4(vnx4si_x,vnx4si_y,&vnx4si_real);
+    for(int i=0; i<4; i++)
+        if(vnx4si_real[i]!=vnx4si_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx4usi vnx4usi_x= {1,2,3,4};
+    vnx4usi vnx4usi_y= {101,102,103,104};
+    vnx4usi vnx4usi_expect= {1,102,3,104};
+    vnx4usi vnx4usi_real;
+    merge5(vnx4usi_x,vnx4usi_y,&vnx4usi_real);
+    for(int i=0; i<4; i++)
+        if(vnx4usi_real[i]!=vnx4usi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx2di vnx2di_x= {1,2};
+    vnx2di vnx2di_y= {101,102};
+    vnx2di vnx2di_expect= {1,102};
+    vnx2di vnx2di_real;
+    merge6(vnx2di_x,vnx2di_y,&vnx2di_real);
+    for(int i=0; i<2; i++)
+        if(vnx2di_real[i]!=vnx2di_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx2udi vnx2udi_x= {1,2};
+    vnx2udi vnx2udi_y= {101,102};
+    vnx2udi vnx2udi_expect= {1,102};
+    vnx2udi vnx2udi_real;
+    merge7(vnx2udi_x,vnx2udi_y,&vnx2udi_real);
+    for(int i=0; i<2; i++)
+        if(vnx2udi_real[i]!=vnx2udi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx8hf vnx8hf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0};
+    vnx8hf vnx8hf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1};
+    vnx8hf vnx8hf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1};
+    vnx8hf vnx8hf_real;
+    merge8(vnx8hf_x,vnx8hf_y,&vnx8hf_real);
+    for(int i=0; i<8; i++)
+        if(vnx8hf_real[i]!=vnx8hf_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx4sf vnx4sf_x= {1.0,2.0,3.0,4.0};
+    vnx4sf vnx4sf_y= {1.1,2.1,3.1,4.1};
+    vnx4sf vnx4sf_expect= {1.0,2.1,3.0,4.1};
+    vnx4sf vnx4sf_real;
+    merge9(vnx4sf_x,vnx4sf_y,&vnx4sf_real);
+    for(int i=0; i<4; i++)
+        if(vnx4sf_real[i]!=vnx4sf_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx2df vnx2df_x= {1.0,2.0};
+    vnx2df vnx2df_y= {1.1,2.1};
+    vnx2df vnx2df_expect= {1.0,2.1};
+    vnx2df vnx2df_real;
+    merge10(vnx2df_x,vnx2df_y,&vnx2df_real);
+    for(int i=0; i<2; i++)
+        if(vnx2df_real[i]!=vnx2df_expect[i]) {
+            __builtin_abort();
+        }
+
+    return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c
new file mode 100644
index 00000000000..248a30433a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c
@@ -0,0 +1,121 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-2.c"
+
+int main(void)
+{
+    vnx32qi vnx32qi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+    vnx32qi vnx32qi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+    vnx32qi vnx32qi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+    vnx32qi vnx32qi_real;
+    merge0(vnx32qi_x,vnx32qi_y,&vnx32qi_real);
+    for(int i=0; i<32; i++)
+        if(vnx32qi_real[i]!=vnx32qi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx32uqi vnx32uqi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+    vnx32uqi vnx32uqi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+    vnx32uqi vnx32uqi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+    vnx32uqi vnx32uqi_real;
+    merge1(vnx32uqi_x,vnx32uqi_y,&vnx32uqi_real);
+    for(int i=0; i<32; i++)
+        if(vnx32uqi_real[i]!=vnx32uqi_expect[i]) {
+            __builtin_abort();
+        }
+
+
+    vnx16hi vnx16hi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+    vnx16hi vnx16hi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+    vnx16hi vnx16hi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+    vnx16hi vnx16hi_real;
+    merge2(vnx16hi_x,vnx16hi_y,&vnx16hi_real);
+    for(int i=0; i<16; i++)
+        if(vnx16hi_real[i]!=vnx16hi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx16uhi vnx16uhi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+    vnx16uhi vnx16uhi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+    vnx16uhi vnx16uhi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+    vnx16uhi vnx16uhi_real;
+    merge3(vnx16uhi_x,vnx16uhi_y,&vnx16uhi_real);
+    for(int i=0; i<16; i++)
+        if(vnx16uhi_real[i]!=vnx16uhi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx8si vnx8si_x= {1,2,3,4,5,6,7,8};
+    vnx8si vnx8si_y= {101,102,103,104,105,106,107,108};
+    vnx8si vnx8si_expect= {1,102,3,104,5,106,7,108};
+    vnx8si vnx8si_real;
+    merge4(vnx8si_x,vnx8si_y,&vnx8si_real);
+    for(int i=0; i<8; i++)
+        if(vnx8si_real[i]!=vnx8si_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx8usi vnx8usi_x= {1,2,3,4,5,6,7,8};
+    vnx8usi vnx8usi_y= {101,102,103,104,105,106,107,108};
+    vnx8usi vnx8usi_expect= {1,102,3,104,5,106,7,108};
+    vnx8usi vnx8usi_real;
+    merge5(vnx8usi_x,vnx8usi_y,&vnx8usi_real);
+    for(int i=0; i<8; i++)
+        if(vnx8usi_real[i]!=vnx8usi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx4di vnx4di_x= {1,2,3,4};
+    vnx4di vnx4di_y= {101,102,103,104};
+    vnx4di vnx4di_expect= {1,102,3,104};
+    vnx4di vnx4di_real;
+    merge6(vnx4di_x,vnx4di_y,&vnx4di_real);
+    for(int i=0; i<4; i++)
+        if(vnx4di_real[i]!=vnx4di_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx4udi vnx4udi_x= {1,2,3,4};
+    vnx4udi vnx4udi_y= {101,102,103,104};
+    vnx4udi vnx4udi_expect= {1,102,3,104};
+    vnx4udi vnx4udi_real;
+    merge7(vnx4udi_x,vnx4udi_y,&vnx4udi_real);
+    for(int i=0; i<4; i++)
+        if(vnx4udi_real[i]!=vnx4udi_expect[i]) {
+            __builtin_abort();
+        }
+
+
+    vnx16hf vnx16hf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0};
+    vnx16hf vnx16hf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1};
+    vnx16hf vnx16hf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1};
+    vnx16hf vnx16hf_real;
+    merge8(vnx16hf_x,vnx16hf_y,&vnx16hf_real);
+    for(int i=0; i<8; i++)
+        if(vnx16hf_real[i]!=vnx16hf_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx8sf vnx8sf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0};
+    vnx8sf vnx8sf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1};
+    vnx8sf vnx8sf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1};
+    vnx8sf vnx8sf_real;
+    merge9(vnx8sf_x,vnx8sf_y,&vnx8sf_real);
+    for(int i=0; i<4; i++)
+        if(vnx8sf_real[i]!=vnx8sf_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx4df vnx4df_x= {1.0,2.0,3.0,4.0};
+    vnx4df vnx4df_y= {1.1,2.1,3.1,4.1};
+    vnx4df vnx4df_expect= {1.0,2.1,3.0,4.1};
+    vnx4df vnx4df_real;
+    merge10(vnx4df_x,vnx4df_y,&vnx4df_real);
+    for(int i=0; i<2; i++)
+        if(vnx4df_real[i]!=vnx4df_expect[i]) {
+            __builtin_abort();
+        }
+
+    return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c
new file mode 100644
index 00000000000..a587dd45eb1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c
@@ -0,0 +1,150 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-3.c"
+
+int main(void)
+{
+    vnx64qi vnx64qi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,\
+                        17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,\
+                        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,\
+                        49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
+                       };
+    vnx64qi vnx64qi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,\
+                        117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,\
+                        133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,\
+                        149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164
+                       };
+    vnx64qi vnx64qi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,\
+                             17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132,\
+                             33,134,35,136,37,138,39,140,41,142,43,144,45,146,47,148,\
+                             49,150,51,152,53,154,55,156,57,158,59,160,61,162,63,164,
+                            };
+    vnx64qi vnx64qi_real;
+    merge0(vnx64qi_x,vnx64qi_y,&vnx64qi_real);
+    for(int i=0; i<64; i++)
+        if(vnx64qi_real[i]!=vnx64qi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx64uqi vnx64uqi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,\
+                          17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,\
+                          33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,\
+                          49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
+                         };
+    vnx64uqi vnx64uqi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,\
+                          117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,\
+                          133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,\
+                          149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164
+                         };
+    vnx64uqi vnx64uqi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,\
+                               17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132,\
+                               33,134,35,136,37,138,39,140,41,142,43,144,45,146,47,148,\
+                               49,150,51,152,53,154,55,156,57,158,59,160,61,162,63,164,
+                              };
+    vnx64uqi vnx64uqi_real;
+    merge1(vnx64uqi_x,vnx64uqi_y,&vnx64uqi_real);
+    for(int i=0; i<64; i++)
+        if(vnx64uqi_real[i]!=vnx64uqi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx32hi vnx32hi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+    vnx32hi vnx32hi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+    vnx32hi vnx32hi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+    vnx32hi vnx32hi_real;
+    merge2(vnx32hi_x,vnx32hi_y,&vnx32hi_real);
+    for(int i=0; i<32; i++)
+        if(vnx32hi_real[i]!=vnx32hi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx32uhi vnx32uhi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+    vnx32uhi vnx32uhi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+    vnx32uhi vnx32uhi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+    vnx32uhi vnx32uhi_real;
+    merge3(vnx32uhi_x,vnx32uhi_y,&vnx32uhi_real);
+    for(int i=0; i<32; i++)
+        if(vnx32uhi_real[i]!=vnx32uhi_expect[i]) {
+            __builtin_abort();
+        }
+
+
+    vnx16si vnx16si_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+    vnx16si vnx16si_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+    vnx16si vnx16si_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+    vnx16si vnx16si_real;
+    merge4(vnx16si_x,vnx16si_y,&vnx16si_real);
+    for(int i=0; i<16; i++)
+        if(vnx16si_real[i]!=vnx16si_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx16usi vnx16usi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+    vnx16usi vnx16usi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+    vnx16usi vnx16usi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+    vnx16usi vnx16usi_real;
+    merge5(vnx16usi_x,vnx16usi_y,&vnx16usi_real);
+    for(int i=0; i<16; i++)
+        if(vnx16usi_real[i]!=vnx16usi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx8di vnx8di_x= {1,2,3,4,5,6,7,8};
+    vnx8di vnx8di_y= {101,102,103,104,105,106,107,108};
+    vnx8di vnx8di_expect= {1,102,3,104,5,106,7,108};
+    vnx8di vnx8di_real;
+    merge6(vnx8di_x,vnx8di_y,&vnx8di_real);
+    for(int i=0; i<8; i++)
+        if(vnx8di_real[i]!=vnx8di_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx8udi vnx8udi_x= {1,2,3,4,5,6,7,8};
+    vnx8udi vnx8udi_y= {101,102,103,104,105,106,107,108};
+    vnx8udi vnx8udi_expect= {1,102,3,104,5,106,7,108};
+    vnx8udi vnx8udi_real;
+    merge7(vnx8udi_x,vnx8udi_y,&vnx8udi_real);
+    for(int i=0; i<8; i++)
+        if(vnx8udi_real[i]!=vnx8udi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx32hf vnx32hf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,\
+                        17.0,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0
+                       };
+    vnx32hf vnx32hf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1,\
+                        17.1,18.1,19.1,20.1,21.1,22.1,23.1,24.1,25.1,26.1,27.1,28.1,29.1,30.1,31.1,32.1
+                       };
+    vnx32hf vnx32hf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1,\
+                             17.0,18.1,19.0,20.1,21.0,22.1,23.0,24.1,25.0,26.1,27.0,28.1,29.0,30.1,31.0,32.1
+                            };
+    vnx32hf vnx32hf_real;
+    merge8(vnx32hf_x,vnx32hf_y,&vnx32hf_real);
+    for(int i=0; i<32; i++)
+        if(vnx32hf_real[i]!=vnx32hf_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx16sf vnx16sf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0};
+    vnx16sf vnx16sf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1};
+    vnx16sf vnx16sf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1};
+    vnx16sf vnx16sf_real;
+    merge9(vnx16sf_x,vnx16sf_y,&vnx16sf_real);
+    for(int i=0; i<16; i++)
+        if(vnx16sf_real[i]!=vnx16sf_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx8df vnx8df_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0};
+    vnx8df vnx8df_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1};
+    vnx8df vnx8df_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1};
+    vnx8df vnx8df_real;
+    merge10(vnx8df_x,vnx8df_y,&vnx8df_real);
+    for(int i=0; i<8; i++)
+        if(vnx8df_real[i]!=vnx8df_expect[i]) {
+            __builtin_abort();
+        }
+
+    return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c
new file mode 100644
index 00000000000..18dedb0f77d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c
@@ -0,0 +1,210 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-4.c"
+
+int main(void)
+{
+    vnx128qi vnx128qi_x= {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,\
+                          16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,\
+                          32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,\
+                          48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,\
+                          64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,\
+                          80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,\
+                          96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,\
+                          112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
+                         };
+    vnx128qi vnx128qi_y= {128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,\
+                          144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,\
+                          160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,\
+                          176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,\
+                          192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,\
+                          208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,\
+                          224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,\
+                          240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
+                         };
+    vnx128qi vnx128qi_expect= {0,129,2,131,4,133,6,135,8,137,10,139,12,141,14,143,\
+                               16,145,18,147,20,149,22,151,24,153,26,155,28,157,30,159,\
+                               32,161,34,163,36,165,38,167,40,169,42,171,44,173,46,175,\
+                               48,177,50,179,52,181,54,183,56,185,58,187,60,189,62,191,\
+                               64,193,66,195,68,197,70,199,72,201,74,203,76,205,78,207,\
+                               80,209,82,211,84,213,86,215,88,217,90,219,92,221,94,223,\
+                               96,225,98,227,100,229,102,231,104,233,106,235,108,237,110,239,\
+                               112,241,114,243,116,245,118,247,120,249,122,251,124,253,126,255
+                              };
+    vnx128qi vnx128qi_real;
+    merge0(vnx128qi_x,vnx128qi_y,&vnx128qi_real);
+    for(int i=0; i<128; i++)
+        if(vnx128qi_real[i]!=vnx128qi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx128uqi vnx128uqi_x= {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,\
+                            16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,\
+                            32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,\
+                            48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,\
+                            64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,\
+                            80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,\
+                            96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,\
+                            112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
+                           };
+    vnx128uqi vnx128uqi_y= {128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,\
+                            144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,\
+                            160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,\
+                            176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,\
+                            192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,\
+                            208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,\
+                            224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,\
+                            240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
+                           };
+    vnx128uqi vnx128uqi_expect= {0,129,2,131,4,133,6,135,8,137,10,139,12,141,14,143,\
+                                 16,145,18,147,20,149,22,151,24,153,26,155,28,157,30,159,\
+                                 32,161,34,163,36,165,38,167,40,169,42,171,44,173,46,175,\
+                                 48,177,50,179,52,181,54,183,56,185,58,187,60,189,62,191,\
+                                 64,193,66,195,68,197,70,199,72,201,74,203,76,205,78,207,\
+                                 80,209,82,211,84,213,86,215,88,217,90,219,92,221,94,223,\
+                                 96,225,98,227,100,229,102,231,104,233,106,235,108,237,110,239,\
+                                 112,241,114,243,116,245,118,247,120,249,122,251,124,253,126,255
+                                };
+    vnx128uqi vnx128uqi_real;
+    merge1(vnx128uqi_x,vnx128uqi_y,&vnx128uqi_real);
+    for(int i=0; i<128; i++)
+        if(vnx128uqi_real[i]!=vnx128uqi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx64hi vnx64hi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,\
+                        17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,\
+                        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,\
+                        49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
+                       };
+    vnx64hi vnx64hi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,\
+                        117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,\
+                        133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,\
+                        149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164
+                       };
+    vnx64hi vnx64hi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,\
+                             17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132,\
+                             33,134,35,136,37,138,39,140,41,142,43,144,45,146,47,148,\
+                             49,150,51,152,53,154,55,156,57,158,59,160,61,162,63,164,
+                            };
+    vnx64hi vnx64hi_real;
+    merge2(vnx64hi_x,vnx64hi_y,&vnx64hi_real);
+    for(int i=0; i<64; i++)
+        if(vnx64hi_real[i]!=vnx64hi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx64uhi vnx64uhi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,\
+                          17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,\
+                          33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,\
+                          49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
+                         };
+    vnx64uhi vnx64uhi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,\
+                          117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,\
+                          133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,\
+                          149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164
+                         };
+    vnx64uhi vnx64uhi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,\
+                               17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132,\
+                               33,134,35,136,37,138,39,140,41,142,43,144,45,146,47,148,\
+                               49,150,51,152,53,154,55,156,57,158,59,160,61,162,63,164,
+                              };
+    vnx64uhi vnx64uhi_real;
+    merge3(vnx64uhi_x,vnx64uhi_y,&vnx64uhi_real);
+    for(int i=0; i<64; i++)
+        if(vnx64uhi_real[i]!=vnx64uhi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx32si vnx32si_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+    vnx32si vnx32si_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+    vnx32si vnx32si_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+    vnx32si vnx32si_real;
+    merge4(vnx32si_x,vnx32si_y,&vnx32si_real);
+    for(int i=0; i<32; i++)
+        if(vnx32si_real[i]!=vnx32si_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx32usi vnx32usi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+    vnx32usi vnx32usi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+    vnx32usi vnx32usi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+    vnx32usi vnx32usi_real;
+    merge5(vnx32usi_x,vnx32usi_y,&vnx32usi_real);
+    for(int i=0; i<32; i++)
+        if(vnx32usi_real[i]!=vnx32usi_expect[i]) {
+            __builtin_abort();
+        }
+
+
+    vnx16di vnx16di_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+    vnx16di vnx16di_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+    vnx16di vnx16di_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+    vnx16di vnx16di_real;
+    merge6(vnx16di_x,vnx16di_y,&vnx16di_real);
+    for(int i=0; i<16; i++)
+        if(vnx16di_real[i]!=vnx16di_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx16udi vnx16udi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+    vnx16udi vnx16udi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+    vnx16udi vnx16udi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+    vnx16udi vnx16udi_real;
+    merge7(vnx16udi_x,vnx16udi_y,&vnx16udi_real);
+    for(int i=0; i<16; i++)
+        if(vnx16udi_real[i]!=vnx16udi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx64hf vnx64hf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,\
+                        17.0,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0,\
+                        33.0,34.0,35.0,36.0,37.0,38.0,39.0,40.0,41.0,42.0,43.0,44.0,45.0,46.0,47.0,48.0,\
+                        49.0,50.0,51.0,52.0,53.0,54.0,55.0,56.0,57.0,58.0,59.0,60.0,61.0,62.0,63.0,64.0
+                       };
+    vnx64hf vnx64hf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1,\
+                        17.1,18.1,19.1,20.1,21.1,22.1,23.1,24.1,25.1,26.1,27.1,28.1,29.1,30.1,31.1,32.1,\
+                        33.1,34.1,35.1,36.1,37.1,38.1,39.1,40.1,41.1,42.1,43.1,44.1,45.1,46.1,47.1,48.1,\
+                        49.1,50.1,51.1,52.1,53.1,54.1,55.1,56.1,57.1,58.1,59.1,60.1,61.1,62.1,63.1,64.1
+                       };
+    vnx64hf vnx64hf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1,\
+                             17.0,18.1,19.0,20.1,21.0,22.1,23.0,24.1,25.0,26.1,27.0,28.1,29.0,30.1,31.0,32.1,\
+                             33.0,34.1,35.0,36.1,37.0,38.1,39.0,40.1,41.0,42.1,43.0,44.1,45.0,46.1,47.0,48.1,\
+                             49.0,50.1,51.0,52.1,53.0,54.1,55.0,56.1,57.0,58.1,59.0,60.1,61.0,62.1,63.0,64.1
+                            };
+    vnx64hf vnx64hf_real;
+    merge8(vnx64hf_x,vnx64hf_y,&vnx64hf_real);
+    for(int i=0; i<64; i++)
+        if(vnx64hf_real[i]!=vnx64hf_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx32sf vnx32sf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,\
+                        17.0,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0
+                       };
+    vnx32sf vnx32sf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1,\
+                        17.1,18.1,19.1,20.1,21.1,22.1,23.1,24.1,25.1,26.1,27.1,28.1,29.1,30.1,31.1,32.1
+                       };
+    vnx32sf vnx32sf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1,\
+                             17.0,18.1,19.0,20.1,21.0,22.1,23.0,24.1,25.0,26.1,27.0,28.1,29.0,30.1,31.0,32.1
+                            };
+    vnx32sf vnx32sf_real;
+    merge9(vnx32sf_x,vnx32sf_y,&vnx32sf_real);
+    for(int i=0; i<32; i++)
+        if(vnx32sf_real[i]!=vnx32sf_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx16df vnx16df_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0};
+    vnx16df vnx16df_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1};
+    vnx16df vnx16df_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1};
+    vnx16df vnx16df_real;
+    merge10(vnx16df_x,vnx16df_y,&vnx16df_real);
+    for(int i=0; i<16; i++)
+        if(vnx16df_real[i]!=vnx16df_expect[i]) {
+            __builtin_abort();
+        }
+
+    return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c
new file mode 100644
index 00000000000..61dbd5b4f2b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c
@@ -0,0 +1,89 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-5.c"
+
+int main(void)
+{
+    vnx8qi vnx8qi_x= {1,2,3,4,5,6,7,8};
+    vnx8qi vnx8qi_y= {101,102,103,104,105,106,107,108};
+    vnx8qi vnx8qi_expect= {1,102,3,104,5,106,7,108};
+    vnx8qi vnx8qi_real;
+    merge0(vnx8qi_x,vnx8qi_y,&vnx8qi_real);
+    for(int i=0; i<8; i++)
+        if(vnx8qi_real[i]!=vnx8qi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx8uqi vnx8uqi_x= {1,2,3,4,5,6,7,8};
+    vnx8uqi vnx8uqi_y= {101,102,103,104,105,106,107,108};
+    vnx8uqi vnx8uqi_expect= {1,102,3,104,5,106,7,108};
+    vnx8uqi vnx8uqi_real;
+    merge1(vnx8uqi_x,vnx8uqi_y,&vnx8uqi_real);
+    for(int i=0; i<8; i++)
+        if(vnx8uqi_real[i]!=vnx8uqi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx4hi vnx4hi_x= {1,2,3,4};
+    vnx4hi vnx4hi_y= {101,102,103,104};
+    vnx4hi vnx4hi_expect= {1,102,3,104};
+    vnx4hi vnx4hi_real;
+    merge2(vnx4hi_x,vnx4hi_y,&vnx4hi_real);
+    for(int i=0; i<4; i++)
+        if(vnx4hi_real[i]!=vnx4hi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx4uhi vnx4uhi_x= {1,2,3,4};
+    vnx4uhi vnx4uhi_y= {101,102,103,104};
+    vnx4uhi vnx4uhi_expect= {1,102,3,104};
+    vnx4uhi vnx4uhi_real;
+    merge3(vnx4uhi_x,vnx4uhi_y,&vnx4uhi_real);
+    for(int i=0; i<4; i++)
+        if(vnx4uhi_real[i]!=vnx4uhi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx2si vnx2si_x= {1,2};
+    vnx2si vnx2si_y= {101,102};
+    vnx2si vnx2si_expect= {1,102};
+    vnx2si vnx2si_real;
+    merge4(vnx2si_x,vnx2si_y,&vnx2si_real);
+    for(int i=0; i<2; i++)
+        if(vnx2si_real[i]!=vnx2si_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx2usi vnx2usi_x= {1,2};
+    vnx2usi vnx2usi_y= {101,102};
+    vnx2usi vnx2usi_expect= {1,102};
+    vnx2usi vnx2usi_real;
+    merge5(vnx2usi_x,vnx2usi_y,&vnx2usi_real);
+    for(int i=0; i<2; i++)
+        if(vnx2usi_real[i]!=vnx2usi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx4hf vnx4hf_x= {1.0,2.0,3.0,4.0};
+    vnx4hf vnx4hf_y= {1.1,2.1,3.1,4.1};
+    vnx4hf vnx4hf_expect= {1.0,2.1,3.0,4.1};
+    vnx4hf vnx4hf_real;
+    merge6(vnx4hf_x,vnx4hf_y,&vnx4hf_real);
+    for(int i=0; i<4; i++)
+        if(vnx4hf_real[i]!=vnx4hf_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx2sf vnx2sf_x= {1.0,2.0};
+    vnx2sf vnx2sf_y= {1.1,2.1};
+    vnx2sf vnx2sf_expect= {1.0,2.1};
+    vnx2sf vnx2sf_real;
+    merge7(vnx2sf_x,vnx2sf_y,&vnx2sf_real);
+    for(int i=0; i<2; i++)
+        if(vnx2sf_real[i]!=vnx2sf_expect[i]) {
+            __builtin_abort();
+        }
+
+    return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c
new file mode 100644
index 00000000000..da7c462e0c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c
@@ -0,0 +1,59 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-6.c"
+
+int main(void)
+{
+    vnx4qi vnx4qi_x= {1,2,3,4};
+    vnx4qi vnx4qi_y= {101,102,103,104};
+    vnx4qi vnx4qi_expect= {1,102,3,104};
+    vnx4qi vnx4qi_real;
+    merge0(vnx4qi_x,vnx4qi_y,&vnx4qi_real);
+    for(int i=0; i<4; i++)
+        if(vnx4qi_real[i]!=vnx4qi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx4uqi vnx4uqi_x= {1,2,3,4};
+    vnx4uqi vnx4uqi_y= {101,102,103,104};
+    vnx4uqi vnx4uqi_expect= {1,102,3,104};
+    vnx4uqi vnx4uqi_real;
+    merge1(vnx4uqi_x,vnx4uqi_y,&vnx4uqi_real);
+    for(int i=0; i<4; i++)
+        if(vnx4uqi_real[i]!=vnx4uqi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx2hi vnx2hi_x= {1,2};
+    vnx2hi vnx2hi_y= {101,102};
+    vnx2hi vnx2hi_expect= {1,102};
+    vnx2hi vnx2hi_real;
+    merge2(vnx2hi_x,vnx2hi_y,&vnx2hi_real);
+    for(int i=0; i<2; i++)
+        if(vnx2hi_real[i]!=vnx2hi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx2uhi vnx2uhi_x= {1,2};
+    vnx2uhi vnx2uhi_y= {101,102};
+    vnx2uhi vnx2uhi_expect= {1,102};
+    vnx2uhi vnx2uhi_real;
+    merge3(vnx2uhi_x,vnx2uhi_y,&vnx2uhi_real);
+    for(int i=0; i<2; i++)
+        if(vnx2uhi_real[i]!=vnx2uhi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx2hf vnx2hf_x= {1.0,2.0};
+    vnx2hf vnx2hf_y= {1.1,2.1};
+    vnx2hf vnx2hf_expect= {1.0,2.1};
+    vnx2hf vnx2hf_real;
+    merge6(vnx2hf_x,vnx2hf_y,&vnx2hf_real);
+    for(int i=0; i<2; i++)
+        if(vnx2hf_real[i]!=vnx2hf_expect[i]) {
+            __builtin_abort();
+        }
+
+    return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c
new file mode 100644
index 00000000000..7aaa6b37d52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c
@@ -0,0 +1,29 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-7.c"
+
+int main(void)
+{
+    vnx2qi vnx2qi_x= {1,2};
+    vnx2qi vnx2qi_y= {101,102};
+    vnx2qi vnx2qi_expect= {1,102};
+    vnx2qi vnx2qi_real;
+    merge0(vnx2qi_x,vnx2qi_y,&vnx2qi_real);
+    for(int i=0; i<2; i++)
+        if(vnx2qi_real[i]!=vnx2qi_expect[i]) {
+            __builtin_abort();
+        }
+
+    vnx2uqi vnx2uqi_x= {1,2};
+    vnx2uqi vnx2uqi_y= {101,102};
+    vnx2uqi vnx2uqi_expect= {1,102};
+    vnx2uqi vnx2uqi_real;
+    merge1(vnx2uqi_x,vnx2uqi_y,&vnx2uqi_real);
+    for(int i=0; i<2; i++)
+        if(vnx2uqi_real[i]!=vnx2uqi_expect[i]) {
+            __builtin_abort();
+        }
+
+    return 0;
+}