[v2] RISC-V: Use merge approach to optimize vector permutation
Checks
Commit Message
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
This patch is to optimize the permuation case that is suiteable use
merge approach.
Consider this following case:
typedef int8_t vnx16qi __attribute__((vector_size (16)));
void __attribute__ ((noipa))
merge0 (vnx16qi x, vnx16qi y, vnx16qi *out)
{
vnx16qi v = __builtin_shufflevector ((vnx16qi) x, (vnx16qi) y, MASK_16);
*(vnx16qi*)out = v;
}
The gimple IR:
v_3 = VEC_PERM_EXPR <x_1(D), y_2(D), { 0, 17, 2, 19, 4, 21, 6, 23, 8, 9, 10, 27, 12, 29, 14, 31 }>;
Selector = { 0, 17, 2, 19, 4, 21, 6, 23, 8, 9, 10, 27, 12, 29, 14, 31 }, the common expression:
{ 0, nunits + 1, 2, nunits + 3, 4, nunits + 5, ... }
For this selector, we can use vmsltu + vmerge to optimize the codegen.
Before this patch:
merge0:
addi a5,sp,16
vl1re8.v v3,0(a5)
li a5,31
vsetivli zero,16,e8,m1,ta,mu
vmv.v.x v2,a5
lui a5,%hi(.LANCHOR0)
addi a5,a5,%lo(.LANCHOR0)
vl1re8.v v1,0(a5)
vl1re8.v v4,0(sp)
vand.vv v1,v1,v2
vmsgeu.vi v0,v1,16
vrgather.vv v2,v4,v1
vadd.vi v1,v1,-16
vrgather.vv v2,v3,v1,v0.t
vs1r.v v2,0(a0)
ret
After this patch:
merge0:
addi a5,sp,16
vl1re8.v v1,0(a5)
lui a5,%hi(.LANCHOR0)
addi a5,a5,%lo(.LANCHOR0)
vsetivli zero,16,e8,m1,ta,ma
vl1re8.v v0,0(a5)
vl1re8.v v2,0(sp)
vmsltu.vi v0,v0,16
vmerge.vvm v1,v1,v2,v0
vs1r.v v1,0(a0)
ret
The key of this optimization is that:
1. mask = vmsltu (selector, nunits)
2. result = vmerge (op0, op1, mask)
gcc/ChangeLog:
* config/riscv/riscv-v.cc (shuffle_merge_patterns): New pattern.
(expand_vec_perm_const_1): Add merge optmization.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c: New test.
---
gcc/config/riscv/riscv-v.cc | 52 +++++
.../riscv/rvv/autovec/vls-vlmax/merge-1.c | 101 +++++++++
.../riscv/rvv/autovec/vls-vlmax/merge-2.c | 103 +++++++++
.../riscv/rvv/autovec/vls-vlmax/merge-3.c | 109 +++++++++
.../riscv/rvv/autovec/vls-vlmax/merge-4.c | 122 ++++++++++
.../riscv/rvv/autovec/vls-vlmax/merge-5.c | 76 +++++++
.../riscv/rvv/autovec/vls-vlmax/merge-6.c | 51 +++++
.../riscv/rvv/autovec/vls-vlmax/merge-7.c | 25 +++
.../riscv/rvv/autovec/vls-vlmax/merge_run-1.c | 119 ++++++++++
.../riscv/rvv/autovec/vls-vlmax/merge_run-2.c | 121 ++++++++++
.../riscv/rvv/autovec/vls-vlmax/merge_run-3.c | 150 +++++++++++++
.../riscv/rvv/autovec/vls-vlmax/merge_run-4.c | 210 ++++++++++++++++++
.../riscv/rvv/autovec/vls-vlmax/merge_run-5.c | 89 ++++++++
.../riscv/rvv/autovec/vls-vlmax/merge_run-6.c | 59 +++++
.../riscv/rvv/autovec/vls-vlmax/merge_run-7.c | 29 +++
15 files changed, 1416 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c
Comments
+ for (int i = n_patterns; i < n_patterns * 2; i++)
+ if (!d->perm.series_p (i, n_patterns, i, n_patterns)
+ && !d->perm.series_p (i, n_patterns, vec_len + i, n_patterns))
+ return false;
As Robin's suggested, Add comment here:
/* Check the pattern is monotonic here, otherwise, return false. */
Send V3 with adding more comments and merge thanks.
juzhe.zhong@rivai.ai
From: pan2.li
Date: 2023-06-15 09:52
To: gcc-patches
CC: juzhe.zhong; palmer; rdapp.gcc; jeffreyalaw; kito.cheng
Subject: [PATCH v2] RISC-V: Use merge approach to optimize vector permutation
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
This patch is to optimize the permuation case that is suiteable use
merge approach.
Consider this following case:
typedef int8_t vnx16qi __attribute__((vector_size (16)));
void __attribute__ ((noipa))
merge0 (vnx16qi x, vnx16qi y, vnx16qi *out)
{
vnx16qi v = __builtin_shufflevector ((vnx16qi) x, (vnx16qi) y, MASK_16);
*(vnx16qi*)out = v;
}
The gimple IR:
v_3 = VEC_PERM_EXPR <x_1(D), y_2(D), { 0, 17, 2, 19, 4, 21, 6, 23, 8, 9, 10, 27, 12, 29, 14, 31 }>;
Selector = { 0, 17, 2, 19, 4, 21, 6, 23, 8, 9, 10, 27, 12, 29, 14, 31 }, the common expression:
{ 0, nunits + 1, 2, nunits + 3, 4, nunits + 5, ... }
For this selector, we can use vmsltu + vmerge to optimize the codegen.
Before this patch:
merge0:
addi a5,sp,16
vl1re8.v v3,0(a5)
li a5,31
vsetivli zero,16,e8,m1,ta,mu
vmv.v.x v2,a5
lui a5,%hi(.LANCHOR0)
addi a5,a5,%lo(.LANCHOR0)
vl1re8.v v1,0(a5)
vl1re8.v v4,0(sp)
vand.vv v1,v1,v2
vmsgeu.vi v0,v1,16
vrgather.vv v2,v4,v1
vadd.vi v1,v1,-16
vrgather.vv v2,v3,v1,v0.t
vs1r.v v2,0(a0)
ret
After this patch:
merge0:
addi a5,sp,16
vl1re8.v v1,0(a5)
lui a5,%hi(.LANCHOR0)
addi a5,a5,%lo(.LANCHOR0)
vsetivli zero,16,e8,m1,ta,ma
vl1re8.v v0,0(a5)
vl1re8.v v2,0(sp)
vmsltu.vi v0,v0,16
vmerge.vvm v1,v1,v2,v0
vs1r.v v1,0(a0)
ret
The key of this optimization is that:
1. mask = vmsltu (selector, nunits)
2. result = vmerge (op0, op1, mask)
gcc/ChangeLog:
* config/riscv/riscv-v.cc (shuffle_merge_patterns): New pattern.
(expand_vec_perm_const_1): Add merge optmization.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c: New test.
---
gcc/config/riscv/riscv-v.cc | 52 +++++
.../riscv/rvv/autovec/vls-vlmax/merge-1.c | 101 +++++++++
.../riscv/rvv/autovec/vls-vlmax/merge-2.c | 103 +++++++++
.../riscv/rvv/autovec/vls-vlmax/merge-3.c | 109 +++++++++
.../riscv/rvv/autovec/vls-vlmax/merge-4.c | 122 ++++++++++
.../riscv/rvv/autovec/vls-vlmax/merge-5.c | 76 +++++++
.../riscv/rvv/autovec/vls-vlmax/merge-6.c | 51 +++++
.../riscv/rvv/autovec/vls-vlmax/merge-7.c | 25 +++
.../riscv/rvv/autovec/vls-vlmax/merge_run-1.c | 119 ++++++++++
.../riscv/rvv/autovec/vls-vlmax/merge_run-2.c | 121 ++++++++++
.../riscv/rvv/autovec/vls-vlmax/merge_run-3.c | 150 +++++++++++++
.../riscv/rvv/autovec/vls-vlmax/merge_run-4.c | 210 ++++++++++++++++++
.../riscv/rvv/autovec/vls-vlmax/merge_run-5.c | 89 ++++++++
.../riscv/rvv/autovec/vls-vlmax/merge_run-6.c | 59 +++++
.../riscv/rvv/autovec/vls-vlmax/merge_run-7.c | 29 +++
15 files changed, 1416 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 01f647bc0bd..079ca68f3e6 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2390,6 +2390,56 @@ struct expand_vec_perm_d
bool testing_p;
};
+/* Recognize the patterns that we can use merge operation to shuffle the
+ vectors. The value of Each element (index i) in selector can only be
+ either i or nunits + i. We will check the pattern is actually monotonic.
+
+ E.g.
+ v = VEC_PERM_EXPR (v0, v1, selector),
+ selector = { 0, nunits + 1, 2, nunits + 3, 4, nunits + 5, ... }
+
+ We can transform such pattern into:
+
+ v = vcond_mask (v0, v1, mask),
+ mask = { 0, 1, 0, 1, 0, 1, ... }. */
+
+static bool
+shuffle_merge_patterns (struct expand_vec_perm_d *d)
+{
+ machine_mode vmode = d->vmode;
+ machine_mode sel_mode = related_int_vector_mode (vmode).require ();
+ int n_patterns = d->perm.encoding ().npatterns ();
+ poly_int64 vec_len = d->perm.length ();
+
+ for (int i = 0; i < n_patterns; ++i)
+ if (!known_eq (d->perm[i], i) && !known_eq (d->perm[i], vec_len + i))
+ return false;
+
+ for (int i = n_patterns; i < n_patterns * 2; i++)
+ if (!d->perm.series_p (i, n_patterns, i, n_patterns)
+ && !d->perm.series_p (i, n_patterns, vec_len + i, n_patterns))
+ return false;
+
+ if (d->testing_p)
+ return true;
+
+ machine_mode mask_mode = get_mask_mode (vmode).require ();
+ rtx mask = gen_reg_rtx (mask_mode);
+
+ rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
+
+ /* MASK = SELECTOR < NUNTIS ? 1 : 0. */
+ rtx x = gen_int_mode (vec_len, GET_MODE_INNER (sel_mode));
+ insn_code icode = code_for_pred_cmp_scalar (sel_mode);
+ rtx cmp = gen_rtx_fmt_ee (LTU, mask_mode, sel, x);
+ rtx ops[] = {mask, cmp, sel, x};
+ emit_vlmax_cmp_insn (icode, ops);
+
+ /* TARGET = MASK ? OP0 : OP1. */
+ emit_insn (gen_vcond_mask (vmode, vmode, d->target, d->op0, d->op1, mask));
+ return true;
+}
+
/* Recognize decompress patterns:
1. VEC_PERM_EXPR op0 and op1
@@ -2511,6 +2561,8 @@ expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
{
if (d->vmode == d->op_mode)
{
+ if (shuffle_merge_patterns (d))
+ return true;
if (shuffle_decompress_patterns (d))
return true;
if (shuffle_generic_patterns (d))
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c
new file mode 100644
index 00000000000..efeb23e9719
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c
@@ -0,0 +1,101 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size (16)));
+typedef int16_t vnx8hi __attribute__((vector_size (16)));
+typedef int32_t vnx4si __attribute__((vector_size (16)));
+typedef int64_t vnx2di __attribute__((vector_size (16)));
+typedef uint8_t vnx16uqi __attribute__((vector_size (16)));
+typedef uint16_t vnx8uhi __attribute__((vector_size (16)));
+typedef uint32_t vnx4usi __attribute__((vector_size (16)));
+typedef uint64_t vnx2udi __attribute__((vector_size (16)));
+
+typedef _Float16 vnx8hf __attribute__((vector_size (16)));
+typedef float vnx4sf __attribute__((vector_size (16)));
+typedef double vnx2df __attribute__((vector_size (16)));
+
+#define MASK_16 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31
+#define MASK_8 0, 9, 2, 11, 4, 13, 6, 15
+#define MASK_4 0, 5, 2, 7
+#define MASK_2 0, 3
+
+void __attribute__ ((noipa))
+merge0 (vnx16qi x, vnx16qi y, vnx16qi *out)
+{
+ vnx16qi v = __builtin_shufflevector ((vnx16qi) x, (vnx16qi) y, MASK_16);
+ *(vnx16qi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx16uqi x, vnx16uqi y, vnx16uqi *out)
+{
+ vnx16uqi v = __builtin_shufflevector ((vnx16uqi) x, (vnx16uqi) y, MASK_16);
+ *(vnx16uqi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx8hi x, vnx8hi y, vnx8hi *out)
+{
+ vnx8hi v = __builtin_shufflevector ((vnx8hi) x, (vnx8hi) y, MASK_8);
+ *(vnx8hi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx8uhi x, vnx8uhi y, vnx8uhi *out)
+{
+ vnx8uhi v = __builtin_shufflevector ((vnx8uhi) x, (vnx8uhi) y, MASK_8);
+ *(vnx8uhi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge4 (vnx4si x, vnx4si y, vnx4si *out)
+{
+ vnx4si v = __builtin_shufflevector ((vnx4si) x, (vnx4si) y, MASK_4);
+ *(vnx4si*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge5 (vnx4usi x, vnx4usi y, vnx4usi *out)
+{
+ vnx4usi v = __builtin_shufflevector ((vnx4usi) x, (vnx4usi) y, MASK_4);
+ *(vnx4usi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx2di x, vnx2di y, vnx2di *out)
+{
+ vnx2di v = __builtin_shufflevector ((vnx2di) x, (vnx2di) y, MASK_2);
+ *(vnx2di*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge7 (vnx2udi x, vnx2udi y, vnx2udi *out)
+{
+ vnx2udi v = __builtin_shufflevector ((vnx2udi) x, (vnx2udi) y, MASK_2);
+ *(vnx2udi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge8 (vnx8hf x, vnx8hf y, vnx8hf *out)
+{
+ vnx8hf v = __builtin_shufflevector ((vnx8hf) x, (vnx8hf) y, MASK_8);
+ *(vnx8hf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge9 (vnx4sf x, vnx4sf y, vnx4sf *out)
+{
+ vnx4sf v = __builtin_shufflevector ((vnx4sf) x, (vnx4sf) y, MASK_4);
+ *(vnx4sf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge10 (vnx2df x, vnx2df y, vnx2df *out)
+{
+ vnx2df v = __builtin_shufflevector ((vnx2df) x, (vnx2df) y, MASK_2);
+ *(vnx2df*)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c
new file mode 100644
index 00000000000..35b2aa8aee9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c
@@ -0,0 +1,103 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx32qi __attribute__((vector_size (32)));
+typedef int16_t vnx16hi __attribute__((vector_size (32)));
+typedef int32_t vnx8si __attribute__((vector_size (32)));
+typedef int64_t vnx4di __attribute__((vector_size (32)));
+typedef uint8_t vnx32uqi __attribute__((vector_size (32)));
+typedef uint16_t vnx16uhi __attribute__((vector_size (32)));
+typedef uint32_t vnx8usi __attribute__((vector_size (32)));
+typedef uint64_t vnx4udi __attribute__((vector_size (32)));
+
+typedef _Float16 vnx16hf __attribute__((vector_size (32)));
+typedef float vnx8sf __attribute__((vector_size (32)));
+typedef double vnx4df __attribute__((vector_size (32)));
+
+#define MASK_32 0, 33, 2, 35, 4, 37, 6, 39, 8, 41, \
+ 10, 43, 12, 45, 14, 47, 16, 49, 18, 51, \
+ 20, 53, 22, 55, 24, 57, 26, 59, 28, 61, 30, 63
+#define MASK_16 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31
+#define MASK_8 0, 9, 2, 11, 4, 13, 6, 15
+#define MASK_4 0, 5, 2, 7
+
+void __attribute__ ((noipa))
+merge0 (vnx32qi x, vnx32qi y, vnx32qi *out)
+{
+ vnx32qi v = __builtin_shufflevector ((vnx32qi) x, (vnx32qi) y, MASK_32);
+ *(vnx32qi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx32uqi x, vnx32uqi y, vnx32uqi *out)
+{
+ vnx32uqi v = __builtin_shufflevector ((vnx32uqi) x, (vnx32uqi) y, MASK_32);
+ *(vnx32uqi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx16hi x, vnx16hi y, vnx16hi *out)
+{
+ vnx16hi v = __builtin_shufflevector ((vnx16hi) x, (vnx16hi) y, MASK_16);
+ *(vnx16hi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx16uhi x, vnx16uhi y, vnx16uhi *out)
+{
+ vnx16uhi v = __builtin_shufflevector ((vnx16uhi) x, (vnx16uhi) y, MASK_16);
+ *(vnx16uhi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge4 (vnx8si x, vnx8si y, vnx8si *out)
+{
+ vnx8si v = __builtin_shufflevector ((vnx8si) x, (vnx8si) y, MASK_8);
+ *(vnx8si*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge5 (vnx8usi x, vnx8usi y, vnx8usi *out)
+{
+ vnx8usi v = __builtin_shufflevector ((vnx8usi) x, (vnx8usi) y, MASK_8);
+ *(vnx8usi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx4di x, vnx4di y, vnx4di *out)
+{
+ vnx4di v = __builtin_shufflevector ((vnx4di) x, (vnx4di) y, MASK_4);
+ *(vnx4di*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge7 (vnx4udi x, vnx4udi y, vnx4udi *out)
+{
+ vnx4udi v = __builtin_shufflevector ((vnx4udi) x, (vnx4udi) y, MASK_4);
+ *(vnx4udi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge8 (vnx16hf x, vnx16hf y, vnx16hf *out)
+{
+ vnx16hf v = __builtin_shufflevector ((vnx16hf) x, (vnx16hf) y, MASK_16);
+ *(vnx16hf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge9 (vnx8sf x, vnx8sf y, vnx8sf *out)
+{
+ vnx8sf v = __builtin_shufflevector ((vnx8sf) x, (vnx8sf) y, MASK_8);
+ *(vnx8sf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge10 (vnx4df x, vnx4df y, vnx4df *out)
+{
+ vnx4df v = __builtin_shufflevector ((vnx4df) x, (vnx4df) y, MASK_4);
+ *(vnx4df*)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c
new file mode 100644
index 00000000000..957d5b26fdc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c
@@ -0,0 +1,109 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx64qi __attribute__((vector_size (64)));
+typedef int16_t vnx32hi __attribute__((vector_size (64)));
+typedef int32_t vnx16si __attribute__((vector_size (64)));
+typedef int64_t vnx8di __attribute__((vector_size (64)));
+typedef uint8_t vnx64uqi __attribute__((vector_size (64)));
+typedef uint16_t vnx32uhi __attribute__((vector_size (64)));
+typedef uint32_t vnx16usi __attribute__((vector_size (64)));
+typedef uint64_t vnx8udi __attribute__((vector_size (64)));
+
+typedef _Float16 vnx32hf __attribute__((vector_size (64)));
+typedef float vnx16sf __attribute__((vector_size (64)));
+typedef double vnx8df __attribute__((vector_size (64)));
+
+#define MASK_64 0, 65, 2, 67, 4, 69, 6, 71, 8, 73, \
+ 10, 75, 12, 77, 14, 79, 16, 81, 18, 83, \
+ 20, 85, 22, 87, 24, 89, 26, 91, 28, 93, 30, 95, \
+ 32, 97, 34, 99, 36, 101, 38, 103, 40, 105, \
+ 42, 107, 44, 109, 46, 111, 48, 113, 50, 115, \
+ 52, 117, 54, 119, 56, 121, 58, 123, 60, 125, \
+ 62, 127
+#define MASK_32 0, 33, 2, 35, 4, 37, 6, 39, 8, 41, \
+ 10, 43, 12, 45, 14, 47, 16, 49, 18, 51, \
+ 20, 53, 22, 55, 24, 57, 26, 59, 28, 61, 30, 63
+#define MASK_16 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31
+#define MASK_8 0, 9, 2, 11, 4, 13, 6, 15
+
+void __attribute__ ((noipa))
+merge0 (vnx64qi x, vnx64qi y, vnx64qi *out)
+{
+ vnx64qi v = __builtin_shufflevector ((vnx64qi) x, (vnx64qi) y, MASK_64);
+ *(vnx64qi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx64uqi x, vnx64uqi y, vnx64uqi *out)
+{
+ vnx64uqi v = __builtin_shufflevector ((vnx64uqi) x, (vnx64uqi) y, MASK_64);
+ *(vnx64uqi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx32hi x, vnx32hi y, vnx32hi *out)
+{
+ vnx32hi v = __builtin_shufflevector ((vnx32hi) x, (vnx32hi) y, MASK_32);
+ *(vnx32hi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx32uhi x, vnx32uhi y, vnx32uhi *out)
+{
+ vnx32uhi v = __builtin_shufflevector ((vnx32uhi) x, (vnx32uhi) y, MASK_32);
+ *(vnx32uhi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge4 (vnx16si x, vnx16si y, vnx16si *out)
+{
+ vnx16si v = __builtin_shufflevector ((vnx16si) x, (vnx16si) y, MASK_16);
+ *(vnx16si*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge5 (vnx16usi x, vnx16usi y, vnx16usi *out)
+{
+ vnx16usi v = __builtin_shufflevector ((vnx16usi) x, (vnx16usi) y, MASK_16);
+ *(vnx16usi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx8di x, vnx8di y, vnx8di *out)
+{
+ vnx8di v = __builtin_shufflevector ((vnx8di) x, (vnx8di) y, MASK_8);
+ *(vnx8di*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge7 (vnx8udi x, vnx8udi y, vnx8udi *out)
+{
+ vnx8udi v = __builtin_shufflevector ((vnx8udi) x, (vnx8udi) y, MASK_8);
+ *(vnx8udi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge8 (vnx32hf x, vnx32hf y, vnx32hf *out)
+{
+ vnx32hf v = __builtin_shufflevector ((vnx32hf) x, (vnx32hf) y, MASK_32);
+ *(vnx32hf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge9 (vnx16sf x, vnx16sf y, vnx16sf *out)
+{
+ vnx16sf v = __builtin_shufflevector ((vnx16sf) x, (vnx16sf) y, MASK_16);
+ *(vnx16sf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge10 (vnx8df x, vnx8df y, vnx8df *out)
+{
+ vnx8df v = __builtin_shufflevector ((vnx8df) x, (vnx8df) y, MASK_8);
+ *(vnx8df*)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c
new file mode 100644
index 00000000000..398d0dcc649
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c
@@ -0,0 +1,122 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx128qi __attribute__((vector_size (128)));
+typedef int16_t vnx64hi __attribute__((vector_size (128)));
+typedef int32_t vnx32si __attribute__((vector_size (128)));
+typedef int64_t vnx16di __attribute__((vector_size (128)));
+typedef uint8_t vnx128uqi __attribute__((vector_size (128)));
+typedef uint16_t vnx64uhi __attribute__((vector_size (128)));
+typedef uint32_t vnx32usi __attribute__((vector_size (128)));
+typedef uint64_t vnx16udi __attribute__((vector_size (128)));
+
+typedef _Float16 vnx64hf __attribute__((vector_size (128)));
+typedef float vnx32sf __attribute__((vector_size (128)));
+typedef double vnx16df __attribute__((vector_size (128)));
+
+#define MASK_128 0, 129, 2, 131, 4, 133, 6, 135, 8, 137, \
+ 10, 139, 12, 141, 14, 143, 16, 145, 18, 147, \
+ 20, 149, 22, 151, 24, 153, 26, 155, 28, 157, 30, 159, \
+ 32, 161, 34, 163, 36, 165, 38, 167, 40, 169, \
+ 42, 171, 44, 173, 46, 175, 48, 177, 50, 179, \
+ 52, 181, 54, 183, 56, 185, 58, 187, 60, 189, \
+ 62, 191, \
+ 64, 193, 66, 195, 68, 197, 70, 199, 72, 201, \
+ 74, 203, 76, 205, 78, 207, 80, 209, 82, 211, \
+ 84, 213, 86, 215, 88, 217, 90, 219, 92, 221, 94, 223, \
+ 96, 225, 98, 227, 100, 229, 102, 231, 104, 233, \
+ 106, 235, 108, 237, 110, 239, 112, 241, 114, 243, \
+ 116, 245, 118, 247, 120, 249, 122, 251, 124, 253, \
+ 126, 255
+#define MASK_64 0, 65, 2, 67, 4, 69, 6, 71, 8, 73, \
+ 10, 75, 12, 77, 14, 79, 16, 81, 18, 83, \
+ 20, 85, 22, 87, 24, 89, 26, 91, 28, 93, 30, 95, \
+ 32, 97, 34, 99, 36, 101, 38, 103, 40, 105, \
+ 42, 107, 44, 109, 46, 111, 48, 113, 50, 115, \
+ 52, 117, 54, 119, 56, 121, 58, 123, 60, 125, \
+ 62, 127
+#define MASK_32 0, 33, 2, 35, 4, 37, 6, 39, 8, 41, \
+ 10, 43, 12, 45, 14, 47, 16, 49, 18, 51, \
+ 20, 53, 22, 55, 24, 57, 26, 59, 28, 61, 30, 63
+#define MASK_16 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31
+
+void __attribute__ ((noipa))
+merge0 (vnx128qi x, vnx128qi y, vnx128qi *out)
+{
+ vnx128qi v = __builtin_shufflevector ((vnx128qi) x, (vnx128qi) y, MASK_128);
+ *(vnx128qi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx128uqi x, vnx128uqi y, vnx128uqi *out)
+{
+ vnx128uqi v = __builtin_shufflevector ((vnx128uqi) x, (vnx128uqi) y, MASK_128);
+ *(vnx128uqi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx64hi x, vnx64hi y, vnx64hi *out)
+{
+ vnx64hi v = __builtin_shufflevector ((vnx64hi) x, (vnx64hi) y, MASK_64);
+ *(vnx64hi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx64uhi x, vnx64uhi y, vnx64uhi *out)
+{
+ vnx64uhi v = __builtin_shufflevector ((vnx64uhi) x, (vnx64uhi) y, MASK_64);
+ *(vnx64uhi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge4 (vnx32si x, vnx32si y, vnx32si *out)
+{
+ vnx32si v = __builtin_shufflevector ((vnx32si) x, (vnx32si) y, MASK_32);
+ *(vnx32si*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge5 (vnx32usi x, vnx32usi y, vnx32usi *out)
+{
+ vnx32usi v = __builtin_shufflevector ((vnx32usi) x, (vnx32usi) y, MASK_32);
+ *(vnx32usi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx16di x, vnx16di y, vnx16di *out)
+{
+ vnx16di v = __builtin_shufflevector ((vnx16di) x, (vnx16di) y, MASK_16);
+ *(vnx16di*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge7 (vnx16udi x, vnx16udi y, vnx16udi *out)
+{
+ vnx16udi v = __builtin_shufflevector ((vnx16udi) x, (vnx16udi) y, MASK_16);
+ *(vnx16udi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge8 (vnx64hf x, vnx64hf y, vnx64hf *out)
+{
+ vnx64hf v = __builtin_shufflevector ((vnx64hf) x, (vnx64hf) y, MASK_64);
+ *(vnx64hf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge9 (vnx32sf x, vnx32sf y, vnx32sf *out)
+{
+ vnx32sf v = __builtin_shufflevector ((vnx32sf) x, (vnx32sf) y, MASK_32);
+ *(vnx32sf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge10 (vnx16df x, vnx16df y, vnx16df *out)
+{
+ vnx16df v = __builtin_shufflevector ((vnx16df) x, (vnx16df) y, MASK_16);
+ *(vnx16df*)out = v;
+}
+
+/* dg-final scan-assembler-times {\tvmerge.vvm} 11 */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c
new file mode 100644
index 00000000000..4d1b9e29b7d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx8qi __attribute__((vector_size (8)));
+typedef int16_t vnx4hi __attribute__((vector_size (8)));
+typedef int32_t vnx2si __attribute__((vector_size (8)));
+typedef uint8_t vnx8uqi __attribute__((vector_size (8)));
+typedef uint16_t vnx4uhi __attribute__((vector_size (8)));
+typedef uint32_t vnx2usi __attribute__((vector_size (8)));
+
+typedef _Float16 vnx4hf __attribute__((vector_size (8)));
+typedef float vnx2sf __attribute__((vector_size (8)));
+
+#define MASK_8 0, 9, 2, 11, 4, 13, 6, 15
+#define MASK_4 0, 5, 2, 7
+#define MASK_2 0, 3
+
+void __attribute__ ((noipa))
+merge0 (vnx8qi x, vnx8qi y, vnx8qi *out)
+{
+ vnx8qi v = __builtin_shufflevector ((vnx8qi) x, (vnx8qi) y, MASK_8);
+ *(vnx8qi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx8uqi x, vnx8uqi y, vnx8uqi *out)
+{
+ vnx8uqi v = __builtin_shufflevector ((vnx8uqi) x, (vnx8uqi) y, MASK_8);
+ *(vnx8uqi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx4hi x, vnx4hi y, vnx4hi *out)
+{
+ vnx4hi v = __builtin_shufflevector ((vnx4hi) x, (vnx4hi) y, MASK_4);
+ *(vnx4hi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx4uhi x, vnx4uhi y, vnx4uhi *out)
+{
+ vnx4uhi v = __builtin_shufflevector ((vnx4uhi) x, (vnx4uhi) y, MASK_4);
+ *(vnx4uhi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge4 (vnx2si x, vnx2si y, vnx2si *out)
+{
+ vnx2si v = __builtin_shufflevector ((vnx2si) x, (vnx2si) y, MASK_2);
+ *(vnx2si *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge5 (vnx2usi x, vnx2usi y, vnx2usi *out)
+{
+ vnx2usi v = __builtin_shufflevector ((vnx2usi) x, (vnx2usi) y, MASK_2);
+ *(vnx2usi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx4hf x, vnx4hf y, vnx4hf *out)
+{
+ vnx4hf v = __builtin_shufflevector ((vnx4hf) x, (vnx4hf) y, MASK_4);
+ *(vnx4hf *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge7 (vnx2sf x, vnx2sf y, vnx2sf *out)
+{
+ vnx2sf v = __builtin_shufflevector ((vnx2sf) x, (vnx2sf) y, MASK_2);
+ *(vnx2sf *)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c
new file mode 100644
index 00000000000..43acea6c345
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx4qi __attribute__((vector_size (4)));
+typedef int16_t vnx2hi __attribute__((vector_size (4)));
+typedef uint8_t vnx4uqi __attribute__((vector_size (4)));
+typedef uint16_t vnx2uhi __attribute__((vector_size (4)));
+
+typedef _Float16 vnx2hf __attribute__((vector_size (4)));
+
+#define MASK_4 0, 5, 2, 7
+#define MASK_2 0, 3
+
+void __attribute__ ((noipa))
+merge0 (vnx4qi x, vnx4qi y, vnx4qi *out)
+{
+ vnx4qi v = __builtin_shufflevector ((vnx4qi) x, (vnx4qi) y, MASK_4);
+ *(vnx4qi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx4uqi x, vnx4uqi y, vnx4uqi *out)
+{
+ vnx4uqi v = __builtin_shufflevector ((vnx4uqi) x, (vnx4uqi) y, MASK_4);
+ *(vnx4uqi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx2hi x, vnx2hi y, vnx2hi *out)
+{
+ vnx2hi v = __builtin_shufflevector ((vnx2hi) x, (vnx2hi) y, MASK_2);
+ *(vnx2hi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx2uhi x, vnx2uhi y, vnx2uhi *out)
+{
+ vnx2uhi v = __builtin_shufflevector ((vnx2uhi) x, (vnx2uhi) y, MASK_2);
+ *(vnx2uhi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx2hf x, vnx2hf y, vnx2hf *out)
+{
+ vnx2hf v = __builtin_shufflevector ((vnx2hf) x, (vnx2hf) y, MASK_2);
+ *(vnx2hf*)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 5 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c
new file mode 100644
index 00000000000..2f38c3d13f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx2qi __attribute__((vector_size (2)));
+typedef uint8_t vnx2uqi __attribute__((vector_size (2)));
+
+#define MASK_2 0, 3
+
+void __attribute__ ((noipa))
+merge0 (vnx2qi x, vnx2qi y, vnx2qi *out)
+{
+ vnx2qi v = __builtin_shufflevector ((vnx2qi) x, (vnx2qi) y, MASK_2);
+ *(vnx2qi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx2uqi x, vnx2uqi y, vnx2uqi *out)
+{
+ vnx2uqi v = __builtin_shufflevector ((vnx2uqi) x, (vnx2uqi) y, MASK_2);
+ *(vnx2uqi *)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c
new file mode 100644
index 00000000000..7449f63583c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c
@@ -0,0 +1,119 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-1.c"
+
+int main(void)
+{
+ vnx16qi vnx16qi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ vnx16qi vnx16qi_y= {201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216};
+ vnx16qi vnx16qi_expect= {1,202,3,204,5,206,7,208,9,210,11,212,13,214,15,216};
+ vnx16qi vnx16qi_real;
+ merge0(vnx16qi_x,vnx16qi_y, &vnx16qi_real);
+ for(int i=0; i<16; i++)
+ if(vnx16qi_real[i]!=vnx16qi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx16uqi vnx16uqi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ vnx16uqi vnx16uqi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+ vnx16uqi vnx16uqi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+ vnx16uqi vnx16uqi_real;
+ merge1(vnx16uqi_x,vnx16uqi_y, &vnx16uqi_real);
+ for(int i=0; i<16; i++)
+ if(vnx16uqi_real[i]!=vnx16uqi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8hi vnx8hi_x= {1,2,3,4,5,6,7,8};
+ vnx8hi vnx8hi_y= {101,102,103,104,105,106,107,108};
+ vnx8hi vnx8hi_expect= {1,102,3,104,5,106,7,108};
+ vnx8hi vnx8hi_real;
+ merge2(vnx8hi_x,vnx8hi_y, &vnx8hi_real);
+ for(int i=0; i<8; i++)
+ if(vnx8hi_real[i]!=vnx8hi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8uhi vnx8uhi_x= {1,2,3,4,5,6,7,8};
+ vnx8uhi vnx8uhi_y= {101,102,103,104,105,106,107,108};
+ vnx8uhi vnx8uhi_expect= {1,102,3,104,5,106,7,108};
+ vnx8uhi vnx8uhi_real;
+ merge3(vnx8uhi_x,vnx8uhi_y, &vnx8uhi_real);
+ for(int i=0; i<8; i++)
+ if(vnx8uhi_real[i]!=vnx8uhi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4si vnx4si_x= {1,2,3,4};
+ vnx4si vnx4si_y= {101,102,103,104};
+ vnx4si vnx4si_expect= {1,102,3,104};
+ vnx4si vnx4si_real;
+ merge4(vnx4si_x,vnx4si_y,&vnx4si_real);
+ for(int i=0; i<4; i++)
+ if(vnx4si_real[i]!=vnx4si_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4usi vnx4usi_x= {1,2,3,4};
+ vnx4usi vnx4usi_y= {101,102,103,104};
+ vnx4usi vnx4usi_expect= {1,102,3,104};
+ vnx4usi vnx4usi_real;
+ merge5(vnx4usi_x,vnx4usi_y,&vnx4usi_real);
+ for(int i=0; i<4; i++)
+ if(vnx4usi_real[i]!=vnx4usi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2di vnx2di_x= {1,2};
+ vnx2di vnx2di_y= {101,102};
+ vnx2di vnx2di_expect= {1,102};
+ vnx2di vnx2di_real;
+ merge6(vnx2di_x,vnx2di_y,&vnx2di_real);
+ for(int i=0; i<2; i++)
+ if(vnx2di_real[i]!=vnx2di_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2udi vnx2udi_x= {1,2};
+ vnx2udi vnx2udi_y= {101,102};
+ vnx2udi vnx2udi_expect= {1,102};
+ vnx2udi vnx2udi_real;
+ merge7(vnx2udi_x,vnx2udi_y,&vnx2udi_real);
+ for(int i=0; i<2; i++)
+ if(vnx2udi_real[i]!=vnx2udi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8hf vnx8hf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0};
+ vnx8hf vnx8hf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1};
+ vnx8hf vnx8hf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1};
+ vnx8hf vnx8hf_real;
+ merge8(vnx8hf_x,vnx8hf_y,&vnx8hf_real);
+ for(int i=0; i<8; i++)
+ if(vnx8hf_real[i]!=vnx8hf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4sf vnx4sf_x= {1.0,2.0,3.0,4.0};
+ vnx4sf vnx4sf_y= {1.1,2.1,3.1,4.1};
+ vnx4sf vnx4sf_expect= {1.0,2.1,3.0,4.1};
+ vnx4sf vnx4sf_real;
+ merge9(vnx4sf_x,vnx4sf_y,&vnx4sf_real);
+ for(int i=0; i<4; i++)
+ if(vnx4sf_real[i]!=vnx4sf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2df vnx2df_x= {1.0,2.0};
+ vnx2df vnx2df_y= {1.1,2.1};
+ vnx2df vnx2df_expect= {1.0,2.1};
+ vnx2df vnx2df_real;
+ merge10(vnx2df_x,vnx2df_y,&vnx2df_real);
+ for(int i=0; i<2; i++)
+ if(vnx2df_real[i]!=vnx2df_expect[i]) {
+ __builtin_abort();
+ }
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c
new file mode 100644
index 00000000000..248a30433a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c
@@ -0,0 +1,121 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-2.c"
+
+int main(void)
+{
+ vnx32qi vnx32qi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+ vnx32qi vnx32qi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+ vnx32qi vnx32qi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+ vnx32qi vnx32qi_real;
+ merge0(vnx32qi_x,vnx32qi_y,&vnx32qi_real);
+ for(int i=0; i<32; i++)
+ if(vnx32qi_real[i]!=vnx32qi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx32uqi vnx32uqi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+ vnx32uqi vnx32uqi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+ vnx32uqi vnx32uqi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+ vnx32uqi vnx32uqi_real;
+ merge1(vnx32uqi_x,vnx32uqi_y,&vnx32uqi_real);
+ for(int i=0; i<32; i++)
+ if(vnx32uqi_real[i]!=vnx32uqi_expect[i]) {
+ __builtin_abort();
+ }
+
+
+ vnx16hi vnx16hi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ vnx16hi vnx16hi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+ vnx16hi vnx16hi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+ vnx16hi vnx16hi_real;
+ merge2(vnx16hi_x,vnx16hi_y,&vnx16hi_real);
+ for(int i=0; i<16; i++)
+ if(vnx16hi_real[i]!=vnx16hi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx16uhi vnx16uhi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ vnx16uhi vnx16uhi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+ vnx16uhi vnx16uhi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+ vnx16uhi vnx16uhi_real;
+ merge3(vnx16uhi_x,vnx16uhi_y,&vnx16uhi_real);
+ for(int i=0; i<16; i++)
+ if(vnx16uhi_real[i]!=vnx16uhi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8si vnx8si_x= {1,2,3,4,5,6,7,8};
+ vnx8si vnx8si_y= {101,102,103,104,105,106,107,108};
+ vnx8si vnx8si_expect= {1,102,3,104,5,106,7,108};
+ vnx8si vnx8si_real;
+ merge4(vnx8si_x,vnx8si_y,&vnx8si_real);
+ for(int i=0; i<8; i++)
+ if(vnx8si_real[i]!=vnx8si_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8usi vnx8usi_x= {1,2,3,4,5,6,7,8};
+ vnx8usi vnx8usi_y= {101,102,103,104,105,106,107,108};
+ vnx8usi vnx8usi_expect= {1,102,3,104,5,106,7,108};
+ vnx8usi vnx8usi_real;
+ merge5(vnx8usi_x,vnx8usi_y,&vnx8usi_real);
+ for(int i=0; i<8; i++)
+ if(vnx8usi_real[i]!=vnx8usi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4di vnx4di_x= {1,2,3,4};
+ vnx4di vnx4di_y= {101,102,103,104};
+ vnx4di vnx4di_expect= {1,102,3,104};
+ vnx4di vnx4di_real;
+ merge6(vnx4di_x,vnx4di_y,&vnx4di_real);
+ for(int i=0; i<4; i++)
+ if(vnx4di_real[i]!=vnx4di_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4udi vnx4udi_x= {1,2,3,4};
+ vnx4udi vnx4udi_y= {101,102,103,104};
+ vnx4udi vnx4udi_expect= {1,102,3,104};
+ vnx4udi vnx4udi_real;
+ merge7(vnx4udi_x,vnx4udi_y,&vnx4udi_real);
+ for(int i=0; i<4; i++)
+ if(vnx4udi_real[i]!=vnx4udi_expect[i]) {
+ __builtin_abort();
+ }
+
+
+ vnx16hf vnx16hf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0};
+ vnx16hf vnx16hf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1};
+ vnx16hf vnx16hf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1};
+ vnx16hf vnx16hf_real;
+ merge8(vnx16hf_x,vnx16hf_y,&vnx16hf_real);
+ for(int i=0; i<8; i++)
+ if(vnx16hf_real[i]!=vnx16hf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8sf vnx8sf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0};
+ vnx8sf vnx8sf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1};
+ vnx8sf vnx8sf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1};
+ vnx8sf vnx8sf_real;
+ merge9(vnx8sf_x,vnx8sf_y,&vnx8sf_real);
+ for(int i=0; i<4; i++)
+ if(vnx8sf_real[i]!=vnx8sf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4df vnx4df_x= {1.0,2.0,3.0,4.0};
+ vnx4df vnx4df_y= {1.1,2.1,3.1,4.1};
+ vnx4df vnx4df_expect= {1.0,2.1,3.0,4.1};
+ vnx4df vnx4df_real;
+ merge10(vnx4df_x,vnx4df_y,&vnx4df_real);
+ for(int i=0; i<2; i++)
+ if(vnx4df_real[i]!=vnx4df_expect[i]) {
+ __builtin_abort();
+ }
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c
new file mode 100644
index 00000000000..a587dd45eb1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c
@@ -0,0 +1,150 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-3.c"
+
+int main(void)
+{
+ vnx64qi vnx64qi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,\
+ 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,\
+ 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,\
+ 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
+ };
+ vnx64qi vnx64qi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,\
+ 117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,\
+ 133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,\
+ 149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164
+ };
+ vnx64qi vnx64qi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,\
+ 17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132,\
+ 33,134,35,136,37,138,39,140,41,142,43,144,45,146,47,148,\
+ 49,150,51,152,53,154,55,156,57,158,59,160,61,162,63,164,
+ };
+ vnx64qi vnx64qi_real;
+ merge0(vnx64qi_x,vnx64qi_y,&vnx64qi_real);
+ for(int i=0; i<64; i++)
+ if(vnx64qi_real[i]!=vnx64qi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx64uqi vnx64uqi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,\
+ 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,\
+ 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,\
+ 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
+ };
+ vnx64uqi vnx64uqi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,\
+ 117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,\
+ 133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,\
+ 149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164
+ };
+ vnx64uqi vnx64uqi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,\
+ 17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132,\
+ 33,134,35,136,37,138,39,140,41,142,43,144,45,146,47,148,\
+ 49,150,51,152,53,154,55,156,57,158,59,160,61,162,63,164,
+ };
+ vnx64uqi vnx64uqi_real;
+ merge1(vnx64uqi_x,vnx64uqi_y,&vnx64uqi_real);
+ for(int i=0; i<64; i++)
+ if(vnx64uqi_real[i]!=vnx64uqi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx32hi vnx32hi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+ vnx32hi vnx32hi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+ vnx32hi vnx32hi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+ vnx32hi vnx32hi_real;
+ merge2(vnx32hi_x,vnx32hi_y,&vnx32hi_real);
+ for(int i=0; i<32; i++)
+ if(vnx32hi_real[i]!=vnx32hi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx32uhi vnx32uhi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+ vnx32uhi vnx32uhi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+ vnx32uhi vnx32uhi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+ vnx32uhi vnx32uhi_real;
+ merge3(vnx32uhi_x,vnx32uhi_y,&vnx32uhi_real);
+ for(int i=0; i<32; i++)
+ if(vnx32uhi_real[i]!=vnx32uhi_expect[i]) {
+ __builtin_abort();
+ }
+
+
+ vnx16si vnx16si_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ vnx16si vnx16si_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+ vnx16si vnx16si_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+ vnx16si vnx16si_real;
+ merge4(vnx16si_x,vnx16si_y,&vnx16si_real);
+ for(int i=0; i<16; i++)
+ if(vnx16si_real[i]!=vnx16si_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx16usi vnx16usi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ vnx16usi vnx16usi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+ vnx16usi vnx16usi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+ vnx16usi vnx16usi_real;
+ merge5(vnx16usi_x,vnx16usi_y,&vnx16usi_real);
+ for(int i=0; i<16; i++)
+ if(vnx16usi_real[i]!=vnx16usi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8di vnx8di_x= {1,2,3,4,5,6,7,8};
+ vnx8di vnx8di_y= {101,102,103,104,105,106,107,108};
+ vnx8di vnx8di_expect= {1,102,3,104,5,106,7,108};
+ vnx8di vnx8di_real;
+ merge6(vnx8di_x,vnx8di_y,&vnx8di_real);
+ for(int i=0; i<8; i++)
+ if(vnx8di_real[i]!=vnx8di_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8udi vnx8udi_x= {1,2,3,4,5,6,7,8};
+ vnx8udi vnx8udi_y= {101,102,103,104,105,106,107,108};
+ vnx8udi vnx8udi_expect= {1,102,3,104,5,106,7,108};
+ vnx8udi vnx8udi_real;
+ merge7(vnx8udi_x,vnx8udi_y,&vnx8udi_real);
+ for(int i=0; i<8; i++)
+ if(vnx8udi_real[i]!=vnx8udi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx32hf vnx32hf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,\
+ 17.0,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0
+ };
+ vnx32hf vnx32hf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1,\
+ 17.1,18.1,19.1,20.1,21.1,22.1,23.1,24.1,25.1,26.1,27.1,28.1,29.1,30.1,31.1,32.1
+ };
+ vnx32hf vnx32hf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1,\
+ 17.0,18.1,19.0,20.1,21.0,22.1,23.0,24.1,25.0,26.1,27.0,28.1,29.0,30.1,31.0,32.1
+ };
+ vnx32hf vnx32hf_real;
+ merge8(vnx32hf_x,vnx32hf_y,&vnx32hf_real);
+ for(int i=0; i<32; i++)
+ if(vnx32hf_real[i]!=vnx32hf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx16sf vnx16sf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0};
+ vnx16sf vnx16sf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1};
+ vnx16sf vnx16sf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1};
+ vnx16sf vnx16sf_real;
+ merge9(vnx16sf_x,vnx16sf_y,&vnx16sf_real);
+ for(int i=0; i<16; i++)
+ if(vnx16sf_real[i]!=vnx16sf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8df vnx8df_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0};
+ vnx8df vnx8df_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1};
+ vnx8df vnx8df_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1};
+ vnx8df vnx8df_real;
+ merge10(vnx8df_x,vnx8df_y,&vnx8df_real);
+ for(int i=0; i<8; i++)
+ if(vnx8df_real[i]!=vnx8df_expect[i]) {
+ __builtin_abort();
+ }
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c
new file mode 100644
index 00000000000..18dedb0f77d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c
@@ -0,0 +1,210 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-4.c"
+
+int main(void)
+{
+ vnx128qi vnx128qi_x= {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,\
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,\
+ 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,\
+ 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,\
+ 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,\
+ 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,\
+ 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,\
+ 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
+ };
+ vnx128qi vnx128qi_y= {128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,\
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,\
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,\
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,\
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,\
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,\
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,\
+ 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
+ };
+ vnx128qi vnx128qi_expect= {0,129,2,131,4,133,6,135,8,137,10,139,12,141,14,143,\
+ 16,145,18,147,20,149,22,151,24,153,26,155,28,157,30,159,\
+ 32,161,34,163,36,165,38,167,40,169,42,171,44,173,46,175,\
+ 48,177,50,179,52,181,54,183,56,185,58,187,60,189,62,191,\
+ 64,193,66,195,68,197,70,199,72,201,74,203,76,205,78,207,\
+ 80,209,82,211,84,213,86,215,88,217,90,219,92,221,94,223,\
+ 96,225,98,227,100,229,102,231,104,233,106,235,108,237,110,239,\
+ 112,241,114,243,116,245,118,247,120,249,122,251,124,253,126,255
+ };
+ vnx128qi vnx128qi_real;
+ merge0(vnx128qi_x,vnx128qi_y,&vnx128qi_real);
+ for(int i=0; i<128; i++)
+ if(vnx128qi_real[i]!=vnx128qi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx128uqi vnx128uqi_x= {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,\
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,\
+ 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,\
+ 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,\
+ 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,\
+ 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,\
+ 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,\
+ 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
+ };
+ vnx128uqi vnx128uqi_y= {128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,\
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,\
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,\
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,\
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,\
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,\
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,\
+ 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
+ };
+ vnx128uqi vnx128uqi_expect= {0,129,2,131,4,133,6,135,8,137,10,139,12,141,14,143,\
+ 16,145,18,147,20,149,22,151,24,153,26,155,28,157,30,159,\
+ 32,161,34,163,36,165,38,167,40,169,42,171,44,173,46,175,\
+ 48,177,50,179,52,181,54,183,56,185,58,187,60,189,62,191,\
+ 64,193,66,195,68,197,70,199,72,201,74,203,76,205,78,207,\
+ 80,209,82,211,84,213,86,215,88,217,90,219,92,221,94,223,\
+ 96,225,98,227,100,229,102,231,104,233,106,235,108,237,110,239,\
+ 112,241,114,243,116,245,118,247,120,249,122,251,124,253,126,255
+ };
+ vnx128uqi vnx128uqi_real;
+ merge1(vnx128uqi_x,vnx128uqi_y,&vnx128uqi_real);
+ for(int i=0; i<128; i++)
+ if(vnx128uqi_real[i]!=vnx128uqi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx64hi vnx64hi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,\
+ 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,\
+ 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,\
+ 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
+ };
+ vnx64hi vnx64hi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,\
+ 117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,\
+ 133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,\
+ 149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164
+ };
+ vnx64hi vnx64hi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,\
+ 17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132,\
+ 33,134,35,136,37,138,39,140,41,142,43,144,45,146,47,148,\
+ 49,150,51,152,53,154,55,156,57,158,59,160,61,162,63,164,
+ };
+ vnx64hi vnx64hi_real;
+ merge2(vnx64hi_x,vnx64hi_y,&vnx64hi_real);
+ for(int i=0; i<64; i++)
+ if(vnx64hi_real[i]!=vnx64hi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx64uhi vnx64uhi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,\
+ 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,\
+ 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,\
+ 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
+ };
+ vnx64uhi vnx64uhi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,\
+ 117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,\
+ 133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,\
+ 149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164
+ };
+ vnx64uhi vnx64uhi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,\
+ 17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132,\
+ 33,134,35,136,37,138,39,140,41,142,43,144,45,146,47,148,\
+ 49,150,51,152,53,154,55,156,57,158,59,160,61,162,63,164,
+ };
+ vnx64uhi vnx64uhi_real;
+ merge3(vnx64uhi_x,vnx64uhi_y,&vnx64uhi_real);
+ for(int i=0; i<64; i++)
+ if(vnx64uhi_real[i]!=vnx64uhi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx32si vnx32si_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+ vnx32si vnx32si_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+ vnx32si vnx32si_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+ vnx32si vnx32si_real;
+ merge4(vnx32si_x,vnx32si_y,&vnx32si_real);
+ for(int i=0; i<32; i++)
+ if(vnx32si_real[i]!=vnx32si_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx32usi vnx32usi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+ vnx32usi vnx32usi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+ vnx32usi vnx32usi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+ vnx32usi vnx32usi_real;
+ merge5(vnx32usi_x,vnx32usi_y,&vnx32usi_real);
+ for(int i=0; i<32; i++)
+ if(vnx32usi_real[i]!=vnx32usi_expect[i]) {
+ __builtin_abort();
+ }
+
+
+ vnx16di vnx16di_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ vnx16di vnx16di_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+ vnx16di vnx16di_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+ vnx16di vnx16di_real;
+ merge6(vnx16di_x,vnx16di_y,&vnx16di_real);
+ for(int i=0; i<16; i++)
+ if(vnx16di_real[i]!=vnx16di_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx16udi vnx16udi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ vnx16udi vnx16udi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+ vnx16udi vnx16udi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+ vnx16udi vnx16udi_real;
+ merge7(vnx16udi_x,vnx16udi_y,&vnx16udi_real);
+ for(int i=0; i<16; i++)
+ if(vnx16udi_real[i]!=vnx16udi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx64hf vnx64hf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,\
+ 17.0,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0,\
+ 33.0,34.0,35.0,36.0,37.0,38.0,39.0,40.0,41.0,42.0,43.0,44.0,45.0,46.0,47.0,48.0,\
+ 49.0,50.0,51.0,52.0,53.0,54.0,55.0,56.0,57.0,58.0,59.0,60.0,61.0,62.0,63.0,64.0
+ };
+ vnx64hf vnx64hf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1,\
+ 17.1,18.1,19.1,20.1,21.1,22.1,23.1,24.1,25.1,26.1,27.1,28.1,29.1,30.1,31.1,32.1,\
+ 33.1,34.1,35.1,36.1,37.1,38.1,39.1,40.1,41.1,42.1,43.1,44.1,45.1,46.1,47.1,48.1,\
+ 49.1,50.1,51.1,52.1,53.1,54.1,55.1,56.1,57.1,58.1,59.1,60.1,61.1,62.1,63.1,64.1
+ };
+ vnx64hf vnx64hf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1,\
+ 17.0,18.1,19.0,20.1,21.0,22.1,23.0,24.1,25.0,26.1,27.0,28.1,29.0,30.1,31.0,32.1,\
+ 33.0,34.1,35.0,36.1,37.0,38.1,39.0,40.1,41.0,42.1,43.0,44.1,45.0,46.1,47.0,48.1,\
+ 49.0,50.1,51.0,52.1,53.0,54.1,55.0,56.1,57.0,58.1,59.0,60.1,61.0,62.1,63.0,64.1
+ };
+ vnx64hf vnx64hf_real;
+ merge8(vnx64hf_x,vnx64hf_y,&vnx64hf_real);
+ for(int i=0; i<64; i++)
+ if(vnx64hf_real[i]!=vnx64hf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx32sf vnx32sf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,\
+ 17.0,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0
+ };
+ vnx32sf vnx32sf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1,\
+ 17.1,18.1,19.1,20.1,21.1,22.1,23.1,24.1,25.1,26.1,27.1,28.1,29.1,30.1,31.1,32.1
+ };
+ vnx32sf vnx32sf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1,\
+ 17.0,18.1,19.0,20.1,21.0,22.1,23.0,24.1,25.0,26.1,27.0,28.1,29.0,30.1,31.0,32.1
+ };
+ vnx32sf vnx32sf_real;
+ merge9(vnx32sf_x,vnx32sf_y,&vnx32sf_real);
+ for(int i=0; i<32; i++)
+ if(vnx32sf_real[i]!=vnx32sf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx16df vnx16df_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0};
+ vnx16df vnx16df_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1};
+ vnx16df vnx16df_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1};
+ vnx16df vnx16df_real;
+ merge10(vnx16df_x,vnx16df_y,&vnx16df_real);
+ for(int i=0; i<16; i++)
+ if(vnx16df_real[i]!=vnx16df_expect[i]) {
+ __builtin_abort();
+ }
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c
new file mode 100644
index 00000000000..61dbd5b4f2b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c
@@ -0,0 +1,89 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-5.c"
+
+int main(void)
+{
+ vnx8qi vnx8qi_x= {1,2,3,4,5,6,7,8};
+ vnx8qi vnx8qi_y= {101,102,103,104,105,106,107,108};
+ vnx8qi vnx8qi_expect= {1,102,3,104,5,106,7,108};
+ vnx8qi vnx8qi_real;
+ merge0(vnx8qi_x,vnx8qi_y,&vnx8qi_real);
+ for(int i=0; i<8; i++)
+ if(vnx8qi_real[i]!=vnx8qi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8uqi vnx8uqi_x= {1,2,3,4,5,6,7,8};
+ vnx8uqi vnx8uqi_y= {101,102,103,104,105,106,107,108};
+ vnx8uqi vnx8uqi_expect= {1,102,3,104,5,106,7,108};
+ vnx8uqi vnx8uqi_real;
+ merge1(vnx8uqi_x,vnx8uqi_y,&vnx8uqi_real);
+ for(int i=0; i<8; i++)
+ if(vnx8uqi_real[i]!=vnx8uqi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4hi vnx4hi_x= {1,2,3,4};
+ vnx4hi vnx4hi_y= {101,102,103,104};
+ vnx4hi vnx4hi_expect= {1,102,3,104};
+ vnx4hi vnx4hi_real;
+ merge2(vnx4hi_x,vnx4hi_y,&vnx4hi_real);
+ for(int i=0; i<4; i++)
+ if(vnx4hi_real[i]!=vnx4hi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4uhi vnx4uhi_x= {1,2,3,4};
+ vnx4uhi vnx4uhi_y= {101,102,103,104};
+ vnx4uhi vnx4uhi_expect= {1,102,3,104};
+ vnx4uhi vnx4uhi_real;
+ merge3(vnx4uhi_x,vnx4uhi_y,&vnx4uhi_real);
+ for(int i=0; i<4; i++)
+ if(vnx4uhi_real[i]!=vnx4uhi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2si vnx2si_x= {1,2};
+ vnx2si vnx2si_y= {101,102};
+ vnx2si vnx2si_expect= {1,102};
+ vnx2si vnx2si_real;
+ merge4(vnx2si_x,vnx2si_y,&vnx2si_real);
+ for(int i=0; i<2; i++)
+ if(vnx2si_real[i]!=vnx2si_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2usi vnx2usi_x= {1,2};
+ vnx2usi vnx2usi_y= {101,102};
+ vnx2usi vnx2usi_expect= {1,102};
+ vnx2usi vnx2usi_real;
+ merge5(vnx2usi_x,vnx2usi_y,&vnx2usi_real);
+ for(int i=0; i<2; i++)
+ if(vnx2usi_real[i]!=vnx2usi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4hf vnx4hf_x= {1.0,2.0,3.0,4.0};
+ vnx4hf vnx4hf_y= {1.1,2.1,3.1,4.1};
+ vnx4hf vnx4hf_expect= {1.0,2.1,3.0,4.1};
+ vnx4hf vnx4hf_real;
+ merge6(vnx4hf_x,vnx4hf_y,&vnx4hf_real);
+ for(int i=0; i<4; i++)
+ if(vnx4hf_real[i]!=vnx4hf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2sf vnx2sf_x= {1.0,2.0};
+ vnx2sf vnx2sf_y= {1.1,2.1};
+ vnx2sf vnx2sf_expect= {1.0,2.1};
+ vnx2sf vnx2sf_real;
+ merge7(vnx2sf_x,vnx2sf_y,&vnx2sf_real);
+ for(int i=0; i<2; i++)
+ if(vnx2sf_real[i]!=vnx2sf_expect[i]) {
+ __builtin_abort();
+ }
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c
new file mode 100644
index 00000000000..da7c462e0c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c
@@ -0,0 +1,59 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-6.c"
+
+int main(void)
+{
+ vnx4qi vnx4qi_x= {1,2,3,4};
+ vnx4qi vnx4qi_y= {101,102,103,104};
+ vnx4qi vnx4qi_expect= {1,102,3,104};
+ vnx4qi vnx4qi_real;
+ merge0(vnx4qi_x,vnx4qi_y,&vnx4qi_real);
+ for(int i=0; i<4; i++)
+ if(vnx4qi_real[i]!=vnx4qi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4uqi vnx4uqi_x= {1,2,3,4};
+ vnx4uqi vnx4uqi_y= {101,102,103,104};
+ vnx4uqi vnx4uqi_expect= {1,102,3,104};
+ vnx4uqi vnx4uqi_real;
+ merge1(vnx4uqi_x,vnx4uqi_y,&vnx4uqi_real);
+ for(int i=0; i<4; i++)
+ if(vnx4uqi_real[i]!=vnx4uqi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2hi vnx2hi_x= {1,2};
+ vnx2hi vnx2hi_y= {101,102};
+ vnx2hi vnx2hi_expect= {1,102};
+ vnx2hi vnx2hi_real;
+ merge2(vnx2hi_x,vnx2hi_y,&vnx2hi_real);
+ for(int i=0; i<2; i++)
+ if(vnx2hi_real[i]!=vnx2hi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2uhi vnx2uhi_x= {1,2};
+ vnx2uhi vnx2uhi_y= {101,102};
+ vnx2uhi vnx2uhi_expect= {1,102};
+ vnx2uhi vnx2uhi_real;
+ merge3(vnx2uhi_x,vnx2uhi_y,&vnx2uhi_real);
+ for(int i=0; i<2; i++)
+ if(vnx2uhi_real[i]!=vnx2uhi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2hf vnx2hf_x= {1.0,2.0};
+ vnx2hf vnx2hf_y= {1.1,2.1};
+ vnx2hf vnx2hf_expect= {1.0,2.1};
+ vnx2hf vnx2hf_real;
+ merge6(vnx2hf_x,vnx2hf_y,&vnx2hf_real);
+ for(int i=0; i<2; i++)
+ if(vnx2hf_real[i]!=vnx2hf_expect[i]) {
+ __builtin_abort();
+ }
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c
new file mode 100644
index 00000000000..7aaa6b37d52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c
@@ -0,0 +1,29 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-7.c"
+
+int main(void)
+{
+ vnx2qi vnx2qi_x= {1,2};
+ vnx2qi vnx2qi_y= {101,102};
+ vnx2qi vnx2qi_expect= {1,102};
+ vnx2qi vnx2qi_real;
+ merge0(vnx2qi_x,vnx2qi_y,&vnx2qi_real);
+ for(int i=0; i<2; i++)
+ if(vnx2qi_real[i]!=vnx2qi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2uqi vnx2uqi_x= {1,2};
+ vnx2uqi vnx2uqi_y= {101,102};
+ vnx2uqi vnx2uqi_expect= {1,102};
+ vnx2uqi vnx2uqi_real;
+ merge1(vnx2uqi_x,vnx2uqi_y,&vnx2uqi_real);
+ for(int i=0; i<2; i++)
+ if(vnx2uqi_real[i]!=vnx2uqi_expect[i]) {
+ __builtin_abort();
+ }
+
+ return 0;
+}
--
2.34.1
@@ -2390,6 +2390,56 @@ struct expand_vec_perm_d
bool testing_p;
};
+/* Recognize the patterns that we can use merge operation to shuffle the
+ vectors. The value of Each element (index i) in selector can only be
+ either i or nunits + i. We will check the pattern is actually monotonic.
+
+ E.g.
+ v = VEC_PERM_EXPR (v0, v1, selector),
+ selector = { 0, nunits + 1, 2, nunits + 3, 4, nunits + 5, ... }
+
+ We can transform such pattern into:
+
+ v = vcond_mask (v0, v1, mask),
+ mask = { 0, 1, 0, 1, 0, 1, ... }. */
+
+static bool
+shuffle_merge_patterns (struct expand_vec_perm_d *d)
+{
+ machine_mode vmode = d->vmode;
+ machine_mode sel_mode = related_int_vector_mode (vmode).require ();
+ int n_patterns = d->perm.encoding ().npatterns ();
+ poly_int64 vec_len = d->perm.length ();
+
+ for (int i = 0; i < n_patterns; ++i)
+ if (!known_eq (d->perm[i], i) && !known_eq (d->perm[i], vec_len + i))
+ return false;
+
+ for (int i = n_patterns; i < n_patterns * 2; i++)
+ if (!d->perm.series_p (i, n_patterns, i, n_patterns)
+ && !d->perm.series_p (i, n_patterns, vec_len + i, n_patterns))
+ return false;
+
+ if (d->testing_p)
+ return true;
+
+ machine_mode mask_mode = get_mask_mode (vmode).require ();
+ rtx mask = gen_reg_rtx (mask_mode);
+
+ rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
+
+ /* MASK = SELECTOR < NUNTIS ? 1 : 0. */
+ rtx x = gen_int_mode (vec_len, GET_MODE_INNER (sel_mode));
+ insn_code icode = code_for_pred_cmp_scalar (sel_mode);
+ rtx cmp = gen_rtx_fmt_ee (LTU, mask_mode, sel, x);
+ rtx ops[] = {mask, cmp, sel, x};
+ emit_vlmax_cmp_insn (icode, ops);
+
+ /* TARGET = MASK ? OP0 : OP1. */
+ emit_insn (gen_vcond_mask (vmode, vmode, d->target, d->op0, d->op1, mask));
+ return true;
+}
+
/* Recognize decompress patterns:
1. VEC_PERM_EXPR op0 and op1
@@ -2511,6 +2561,8 @@ expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
{
if (d->vmode == d->op_mode)
{
+ if (shuffle_merge_patterns (d))
+ return true;
if (shuffle_decompress_patterns (d))
return true;
if (shuffle_generic_patterns (d))
new file mode 100644
@@ -0,0 +1,101 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size (16)));
+typedef int16_t vnx8hi __attribute__((vector_size (16)));
+typedef int32_t vnx4si __attribute__((vector_size (16)));
+typedef int64_t vnx2di __attribute__((vector_size (16)));
+typedef uint8_t vnx16uqi __attribute__((vector_size (16)));
+typedef uint16_t vnx8uhi __attribute__((vector_size (16)));
+typedef uint32_t vnx4usi __attribute__((vector_size (16)));
+typedef uint64_t vnx2udi __attribute__((vector_size (16)));
+
+typedef _Float16 vnx8hf __attribute__((vector_size (16)));
+typedef float vnx4sf __attribute__((vector_size (16)));
+typedef double vnx2df __attribute__((vector_size (16)));
+
+#define MASK_16 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31
+#define MASK_8 0, 9, 2, 11, 4, 13, 6, 15
+#define MASK_4 0, 5, 2, 7
+#define MASK_2 0, 3
+
+void __attribute__ ((noipa))
+merge0 (vnx16qi x, vnx16qi y, vnx16qi *out)
+{
+ vnx16qi v = __builtin_shufflevector ((vnx16qi) x, (vnx16qi) y, MASK_16);
+ *(vnx16qi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx16uqi x, vnx16uqi y, vnx16uqi *out)
+{
+ vnx16uqi v = __builtin_shufflevector ((vnx16uqi) x, (vnx16uqi) y, MASK_16);
+ *(vnx16uqi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx8hi x, vnx8hi y, vnx8hi *out)
+{
+ vnx8hi v = __builtin_shufflevector ((vnx8hi) x, (vnx8hi) y, MASK_8);
+ *(vnx8hi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx8uhi x, vnx8uhi y, vnx8uhi *out)
+{
+ vnx8uhi v = __builtin_shufflevector ((vnx8uhi) x, (vnx8uhi) y, MASK_8);
+ *(vnx8uhi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge4 (vnx4si x, vnx4si y, vnx4si *out)
+{
+ vnx4si v = __builtin_shufflevector ((vnx4si) x, (vnx4si) y, MASK_4);
+ *(vnx4si*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge5 (vnx4usi x, vnx4usi y, vnx4usi *out)
+{
+ vnx4usi v = __builtin_shufflevector ((vnx4usi) x, (vnx4usi) y, MASK_4);
+ *(vnx4usi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx2di x, vnx2di y, vnx2di *out)
+{
+ vnx2di v = __builtin_shufflevector ((vnx2di) x, (vnx2di) y, MASK_2);
+ *(vnx2di*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge7 (vnx2udi x, vnx2udi y, vnx2udi *out)
+{
+ vnx2udi v = __builtin_shufflevector ((vnx2udi) x, (vnx2udi) y, MASK_2);
+ *(vnx2udi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge8 (vnx8hf x, vnx8hf y, vnx8hf *out)
+{
+ vnx8hf v = __builtin_shufflevector ((vnx8hf) x, (vnx8hf) y, MASK_8);
+ *(vnx8hf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge9 (vnx4sf x, vnx4sf y, vnx4sf *out)
+{
+ vnx4sf v = __builtin_shufflevector ((vnx4sf) x, (vnx4sf) y, MASK_4);
+ *(vnx4sf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge10 (vnx2df x, vnx2df y, vnx2df *out)
+{
+ vnx2df v = __builtin_shufflevector ((vnx2df) x, (vnx2df) y, MASK_2);
+ *(vnx2df*)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
new file mode 100644
@@ -0,0 +1,103 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx32qi __attribute__((vector_size (32)));
+typedef int16_t vnx16hi __attribute__((vector_size (32)));
+typedef int32_t vnx8si __attribute__((vector_size (32)));
+typedef int64_t vnx4di __attribute__((vector_size (32)));
+typedef uint8_t vnx32uqi __attribute__((vector_size (32)));
+typedef uint16_t vnx16uhi __attribute__((vector_size (32)));
+typedef uint32_t vnx8usi __attribute__((vector_size (32)));
+typedef uint64_t vnx4udi __attribute__((vector_size (32)));
+
+typedef _Float16 vnx16hf __attribute__((vector_size (32)));
+typedef float vnx8sf __attribute__((vector_size (32)));
+typedef double vnx4df __attribute__((vector_size (32)));
+
+#define MASK_32 0, 33, 2, 35, 4, 37, 6, 39, 8, 41, \
+ 10, 43, 12, 45, 14, 47, 16, 49, 18, 51, \
+ 20, 53, 22, 55, 24, 57, 26, 59, 28, 61, 30, 63
+#define MASK_16 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31
+#define MASK_8 0, 9, 2, 11, 4, 13, 6, 15
+#define MASK_4 0, 5, 2, 7
+
+void __attribute__ ((noipa))
+merge0 (vnx32qi x, vnx32qi y, vnx32qi *out)
+{
+ vnx32qi v = __builtin_shufflevector ((vnx32qi) x, (vnx32qi) y, MASK_32);
+ *(vnx32qi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx32uqi x, vnx32uqi y, vnx32uqi *out)
+{
+ vnx32uqi v = __builtin_shufflevector ((vnx32uqi) x, (vnx32uqi) y, MASK_32);
+ *(vnx32uqi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx16hi x, vnx16hi y, vnx16hi *out)
+{
+ vnx16hi v = __builtin_shufflevector ((vnx16hi) x, (vnx16hi) y, MASK_16);
+ *(vnx16hi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx16uhi x, vnx16uhi y, vnx16uhi *out)
+{
+ vnx16uhi v = __builtin_shufflevector ((vnx16uhi) x, (vnx16uhi) y, MASK_16);
+ *(vnx16uhi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge4 (vnx8si x, vnx8si y, vnx8si *out)
+{
+ vnx8si v = __builtin_shufflevector ((vnx8si) x, (vnx8si) y, MASK_8);
+ *(vnx8si*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge5 (vnx8usi x, vnx8usi y, vnx8usi *out)
+{
+ vnx8usi v = __builtin_shufflevector ((vnx8usi) x, (vnx8usi) y, MASK_8);
+ *(vnx8usi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx4di x, vnx4di y, vnx4di *out)
+{
+ vnx4di v = __builtin_shufflevector ((vnx4di) x, (vnx4di) y, MASK_4);
+ *(vnx4di*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge7 (vnx4udi x, vnx4udi y, vnx4udi *out)
+{
+ vnx4udi v = __builtin_shufflevector ((vnx4udi) x, (vnx4udi) y, MASK_4);
+ *(vnx4udi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge8 (vnx16hf x, vnx16hf y, vnx16hf *out)
+{
+ vnx16hf v = __builtin_shufflevector ((vnx16hf) x, (vnx16hf) y, MASK_16);
+ *(vnx16hf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge9 (vnx8sf x, vnx8sf y, vnx8sf *out)
+{
+ vnx8sf v = __builtin_shufflevector ((vnx8sf) x, (vnx8sf) y, MASK_8);
+ *(vnx8sf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge10 (vnx4df x, vnx4df y, vnx4df *out)
+{
+ vnx4df v = __builtin_shufflevector ((vnx4df) x, (vnx4df) y, MASK_4);
+ *(vnx4df*)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
new file mode 100644
@@ -0,0 +1,109 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx64qi __attribute__((vector_size (64)));
+typedef int16_t vnx32hi __attribute__((vector_size (64)));
+typedef int32_t vnx16si __attribute__((vector_size (64)));
+typedef int64_t vnx8di __attribute__((vector_size (64)));
+typedef uint8_t vnx64uqi __attribute__((vector_size (64)));
+typedef uint16_t vnx32uhi __attribute__((vector_size (64)));
+typedef uint32_t vnx16usi __attribute__((vector_size (64)));
+typedef uint64_t vnx8udi __attribute__((vector_size (64)));
+
+typedef _Float16 vnx32hf __attribute__((vector_size (64)));
+typedef float vnx16sf __attribute__((vector_size (64)));
+typedef double vnx8df __attribute__((vector_size (64)));
+
+#define MASK_64 0, 65, 2, 67, 4, 69, 6, 71, 8, 73, \
+ 10, 75, 12, 77, 14, 79, 16, 81, 18, 83, \
+ 20, 85, 22, 87, 24, 89, 26, 91, 28, 93, 30, 95, \
+ 32, 97, 34, 99, 36, 101, 38, 103, 40, 105, \
+ 42, 107, 44, 109, 46, 111, 48, 113, 50, 115, \
+ 52, 117, 54, 119, 56, 121, 58, 123, 60, 125, \
+ 62, 127
+#define MASK_32 0, 33, 2, 35, 4, 37, 6, 39, 8, 41, \
+ 10, 43, 12, 45, 14, 47, 16, 49, 18, 51, \
+ 20, 53, 22, 55, 24, 57, 26, 59, 28, 61, 30, 63
+#define MASK_16 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31
+#define MASK_8 0, 9, 2, 11, 4, 13, 6, 15
+
+void __attribute__ ((noipa))
+merge0 (vnx64qi x, vnx64qi y, vnx64qi *out)
+{
+ vnx64qi v = __builtin_shufflevector ((vnx64qi) x, (vnx64qi) y, MASK_64);
+ *(vnx64qi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx64uqi x, vnx64uqi y, vnx64uqi *out)
+{
+ vnx64uqi v = __builtin_shufflevector ((vnx64uqi) x, (vnx64uqi) y, MASK_64);
+ *(vnx64uqi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx32hi x, vnx32hi y, vnx32hi *out)
+{
+ vnx32hi v = __builtin_shufflevector ((vnx32hi) x, (vnx32hi) y, MASK_32);
+ *(vnx32hi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx32uhi x, vnx32uhi y, vnx32uhi *out)
+{
+ vnx32uhi v = __builtin_shufflevector ((vnx32uhi) x, (vnx32uhi) y, MASK_32);
+ *(vnx32uhi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge4 (vnx16si x, vnx16si y, vnx16si *out)
+{
+ vnx16si v = __builtin_shufflevector ((vnx16si) x, (vnx16si) y, MASK_16);
+ *(vnx16si*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge5 (vnx16usi x, vnx16usi y, vnx16usi *out)
+{
+ vnx16usi v = __builtin_shufflevector ((vnx16usi) x, (vnx16usi) y, MASK_16);
+ *(vnx16usi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx8di x, vnx8di y, vnx8di *out)
+{
+ vnx8di v = __builtin_shufflevector ((vnx8di) x, (vnx8di) y, MASK_8);
+ *(vnx8di*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge7 (vnx8udi x, vnx8udi y, vnx8udi *out)
+{
+ vnx8udi v = __builtin_shufflevector ((vnx8udi) x, (vnx8udi) y, MASK_8);
+ *(vnx8udi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge8 (vnx32hf x, vnx32hf y, vnx32hf *out)
+{
+ vnx32hf v = __builtin_shufflevector ((vnx32hf) x, (vnx32hf) y, MASK_32);
+ *(vnx32hf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge9 (vnx16sf x, vnx16sf y, vnx16sf *out)
+{
+ vnx16sf v = __builtin_shufflevector ((vnx16sf) x, (vnx16sf) y, MASK_16);
+ *(vnx16sf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge10 (vnx8df x, vnx8df y, vnx8df *out)
+{
+ vnx8df v = __builtin_shufflevector ((vnx8df) x, (vnx8df) y, MASK_8);
+ *(vnx8df*)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
new file mode 100644
@@ -0,0 +1,122 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx128qi __attribute__((vector_size (128)));
+typedef int16_t vnx64hi __attribute__((vector_size (128)));
+typedef int32_t vnx32si __attribute__((vector_size (128)));
+typedef int64_t vnx16di __attribute__((vector_size (128)));
+typedef uint8_t vnx128uqi __attribute__((vector_size (128)));
+typedef uint16_t vnx64uhi __attribute__((vector_size (128)));
+typedef uint32_t vnx32usi __attribute__((vector_size (128)));
+typedef uint64_t vnx16udi __attribute__((vector_size (128)));
+
+typedef _Float16 vnx64hf __attribute__((vector_size (128)));
+typedef float vnx32sf __attribute__((vector_size (128)));
+typedef double vnx16df __attribute__((vector_size (128)));
+
+#define MASK_128 0, 129, 2, 131, 4, 133, 6, 135, 8, 137, \
+ 10, 139, 12, 141, 14, 143, 16, 145, 18, 147, \
+ 20, 149, 22, 151, 24, 153, 26, 155, 28, 157, 30, 159, \
+ 32, 161, 34, 163, 36, 165, 38, 167, 40, 169, \
+ 42, 171, 44, 173, 46, 175, 48, 177, 50, 179, \
+ 52, 181, 54, 183, 56, 185, 58, 187, 60, 189, \
+ 62, 191, \
+ 64, 193, 66, 195, 68, 197, 70, 199, 72, 201, \
+ 74, 203, 76, 205, 78, 207, 80, 209, 82, 211, \
+ 84, 213, 86, 215, 88, 217, 90, 219, 92, 221, 94, 223, \
+ 96, 225, 98, 227, 100, 229, 102, 231, 104, 233, \
+ 106, 235, 108, 237, 110, 239, 112, 241, 114, 243, \
+ 116, 245, 118, 247, 120, 249, 122, 251, 124, 253, \
+ 126, 255
+#define MASK_64 0, 65, 2, 67, 4, 69, 6, 71, 8, 73, \
+ 10, 75, 12, 77, 14, 79, 16, 81, 18, 83, \
+ 20, 85, 22, 87, 24, 89, 26, 91, 28, 93, 30, 95, \
+ 32, 97, 34, 99, 36, 101, 38, 103, 40, 105, \
+ 42, 107, 44, 109, 46, 111, 48, 113, 50, 115, \
+ 52, 117, 54, 119, 56, 121, 58, 123, 60, 125, \
+ 62, 127
+#define MASK_32 0, 33, 2, 35, 4, 37, 6, 39, 8, 41, \
+ 10, 43, 12, 45, 14, 47, 16, 49, 18, 51, \
+ 20, 53, 22, 55, 24, 57, 26, 59, 28, 61, 30, 63
+#define MASK_16 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31
+
+void __attribute__ ((noipa))
+merge0 (vnx128qi x, vnx128qi y, vnx128qi *out)
+{
+ vnx128qi v = __builtin_shufflevector ((vnx128qi) x, (vnx128qi) y, MASK_128);
+ *(vnx128qi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx128uqi x, vnx128uqi y, vnx128uqi *out)
+{
+ vnx128uqi v = __builtin_shufflevector ((vnx128uqi) x, (vnx128uqi) y, MASK_128);
+ *(vnx128uqi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx64hi x, vnx64hi y, vnx64hi *out)
+{
+ vnx64hi v = __builtin_shufflevector ((vnx64hi) x, (vnx64hi) y, MASK_64);
+ *(vnx64hi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx64uhi x, vnx64uhi y, vnx64uhi *out)
+{
+ vnx64uhi v = __builtin_shufflevector ((vnx64uhi) x, (vnx64uhi) y, MASK_64);
+ *(vnx64uhi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge4 (vnx32si x, vnx32si y, vnx32si *out)
+{
+ vnx32si v = __builtin_shufflevector ((vnx32si) x, (vnx32si) y, MASK_32);
+ *(vnx32si*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge5 (vnx32usi x, vnx32usi y, vnx32usi *out)
+{
+ vnx32usi v = __builtin_shufflevector ((vnx32usi) x, (vnx32usi) y, MASK_32);
+ *(vnx32usi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx16di x, vnx16di y, vnx16di *out)
+{
+ vnx16di v = __builtin_shufflevector ((vnx16di) x, (vnx16di) y, MASK_16);
+ *(vnx16di*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge7 (vnx16udi x, vnx16udi y, vnx16udi *out)
+{
+ vnx16udi v = __builtin_shufflevector ((vnx16udi) x, (vnx16udi) y, MASK_16);
+ *(vnx16udi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge8 (vnx64hf x, vnx64hf y, vnx64hf *out)
+{
+ vnx64hf v = __builtin_shufflevector ((vnx64hf) x, (vnx64hf) y, MASK_64);
+ *(vnx64hf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge9 (vnx32sf x, vnx32sf y, vnx32sf *out)
+{
+ vnx32sf v = __builtin_shufflevector ((vnx32sf) x, (vnx32sf) y, MASK_32);
+ *(vnx32sf*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge10 (vnx16df x, vnx16df y, vnx16df *out)
+{
+ vnx16df v = __builtin_shufflevector ((vnx16df) x, (vnx16df) y, MASK_16);
+ *(vnx16df*)out = v;
+}
+
+/* dg-final scan-assembler-times {\tvmerge.vvm} 11 */
new file mode 100644
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx8qi __attribute__((vector_size (8)));
+typedef int16_t vnx4hi __attribute__((vector_size (8)));
+typedef int32_t vnx2si __attribute__((vector_size (8)));
+typedef uint8_t vnx8uqi __attribute__((vector_size (8)));
+typedef uint16_t vnx4uhi __attribute__((vector_size (8)));
+typedef uint32_t vnx2usi __attribute__((vector_size (8)));
+
+typedef _Float16 vnx4hf __attribute__((vector_size (8)));
+typedef float vnx2sf __attribute__((vector_size (8)));
+
+#define MASK_8 0, 9, 2, 11, 4, 13, 6, 15
+#define MASK_4 0, 5, 2, 7
+#define MASK_2 0, 3
+
+void __attribute__ ((noipa))
+merge0 (vnx8qi x, vnx8qi y, vnx8qi *out)
+{
+ vnx8qi v = __builtin_shufflevector ((vnx8qi) x, (vnx8qi) y, MASK_8);
+ *(vnx8qi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx8uqi x, vnx8uqi y, vnx8uqi *out)
+{
+ vnx8uqi v = __builtin_shufflevector ((vnx8uqi) x, (vnx8uqi) y, MASK_8);
+ *(vnx8uqi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx4hi x, vnx4hi y, vnx4hi *out)
+{
+ vnx4hi v = __builtin_shufflevector ((vnx4hi) x, (vnx4hi) y, MASK_4);
+ *(vnx4hi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx4uhi x, vnx4uhi y, vnx4uhi *out)
+{
+ vnx4uhi v = __builtin_shufflevector ((vnx4uhi) x, (vnx4uhi) y, MASK_4);
+ *(vnx4uhi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge4 (vnx2si x, vnx2si y, vnx2si *out)
+{
+ vnx2si v = __builtin_shufflevector ((vnx2si) x, (vnx2si) y, MASK_2);
+ *(vnx2si *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge5 (vnx2usi x, vnx2usi y, vnx2usi *out)
+{
+ vnx2usi v = __builtin_shufflevector ((vnx2usi) x, (vnx2usi) y, MASK_2);
+ *(vnx2usi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx4hf x, vnx4hf y, vnx4hf *out)
+{
+ vnx4hf v = __builtin_shufflevector ((vnx4hf) x, (vnx4hf) y, MASK_4);
+ *(vnx4hf *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge7 (vnx2sf x, vnx2sf y, vnx2sf *out)
+{
+ vnx2sf v = __builtin_shufflevector ((vnx2sf) x, (vnx2sf) y, MASK_2);
+ *(vnx2sf *)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 8 } } */
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx4qi __attribute__((vector_size (4)));
+typedef int16_t vnx2hi __attribute__((vector_size (4)));
+typedef uint8_t vnx4uqi __attribute__((vector_size (4)));
+typedef uint16_t vnx2uhi __attribute__((vector_size (4)));
+
+typedef _Float16 vnx2hf __attribute__((vector_size (4)));
+
+#define MASK_4 0, 5, 2, 7
+#define MASK_2 0, 3
+
+void __attribute__ ((noipa))
+merge0 (vnx4qi x, vnx4qi y, vnx4qi *out)
+{
+ vnx4qi v = __builtin_shufflevector ((vnx4qi) x, (vnx4qi) y, MASK_4);
+ *(vnx4qi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx4uqi x, vnx4uqi y, vnx4uqi *out)
+{
+ vnx4uqi v = __builtin_shufflevector ((vnx4uqi) x, (vnx4uqi) y, MASK_4);
+ *(vnx4uqi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge2 (vnx2hi x, vnx2hi y, vnx2hi *out)
+{
+ vnx2hi v = __builtin_shufflevector ((vnx2hi) x, (vnx2hi) y, MASK_2);
+ *(vnx2hi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge3 (vnx2uhi x, vnx2uhi y, vnx2uhi *out)
+{
+ vnx2uhi v = __builtin_shufflevector ((vnx2uhi) x, (vnx2uhi) y, MASK_2);
+ *(vnx2uhi*)out = v;
+}
+
+void __attribute__ ((noipa))
+merge6 (vnx2hf x, vnx2hf y, vnx2hf *out)
+{
+ vnx2hf v = __builtin_shufflevector ((vnx2hf) x, (vnx2hf) y, MASK_2);
+ *(vnx2hf*)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 5 } } */
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -mabi=lp64d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx2qi __attribute__((vector_size (2)));
+typedef uint8_t vnx2uqi __attribute__((vector_size (2)));
+
+#define MASK_2 0, 3
+
+void __attribute__ ((noipa))
+merge0 (vnx2qi x, vnx2qi y, vnx2qi *out)
+{
+ vnx2qi v = __builtin_shufflevector ((vnx2qi) x, (vnx2qi) y, MASK_2);
+ *(vnx2qi *)out = v;
+}
+
+void __attribute__ ((noipa))
+merge1 (vnx2uqi x, vnx2uqi y, vnx2uqi *out)
+{
+ vnx2uqi v = __builtin_shufflevector ((vnx2uqi) x, (vnx2uqi) y, MASK_2);
+ *(vnx2uqi *)out = v;
+}
+
+/* { dg-final { scan-assembler-times {\tvmerge.vvm} 2 } } */
new file mode 100644
@@ -0,0 +1,119 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-1.c"
+
+int main(void)
+{
+ vnx16qi vnx16qi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ vnx16qi vnx16qi_y= {201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216};
+ vnx16qi vnx16qi_expect= {1,202,3,204,5,206,7,208,9,210,11,212,13,214,15,216};
+ vnx16qi vnx16qi_real;
+ merge0(vnx16qi_x,vnx16qi_y, &vnx16qi_real);
+ for(int i=0; i<16; i++)
+ if(vnx16qi_real[i]!=vnx16qi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx16uqi vnx16uqi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ vnx16uqi vnx16uqi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+ vnx16uqi vnx16uqi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+ vnx16uqi vnx16uqi_real;
+ merge1(vnx16uqi_x,vnx16uqi_y, &vnx16uqi_real);
+ for(int i=0; i<16; i++)
+ if(vnx16uqi_real[i]!=vnx16uqi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8hi vnx8hi_x= {1,2,3,4,5,6,7,8};
+ vnx8hi vnx8hi_y= {101,102,103,104,105,106,107,108};
+ vnx8hi vnx8hi_expect= {1,102,3,104,5,106,7,108};
+ vnx8hi vnx8hi_real;
+ merge2(vnx8hi_x,vnx8hi_y, &vnx8hi_real);
+ for(int i=0; i<8; i++)
+ if(vnx8hi_real[i]!=vnx8hi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8uhi vnx8uhi_x= {1,2,3,4,5,6,7,8};
+ vnx8uhi vnx8uhi_y= {101,102,103,104,105,106,107,108};
+ vnx8uhi vnx8uhi_expect= {1,102,3,104,5,106,7,108};
+ vnx8uhi vnx8uhi_real;
+ merge3(vnx8uhi_x,vnx8uhi_y, &vnx8uhi_real);
+ for(int i=0; i<8; i++)
+ if(vnx8uhi_real[i]!=vnx8uhi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4si vnx4si_x= {1,2,3,4};
+ vnx4si vnx4si_y= {101,102,103,104};
+ vnx4si vnx4si_expect= {1,102,3,104};
+ vnx4si vnx4si_real;
+ merge4(vnx4si_x,vnx4si_y,&vnx4si_real);
+ for(int i=0; i<4; i++)
+ if(vnx4si_real[i]!=vnx4si_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4usi vnx4usi_x= {1,2,3,4};
+ vnx4usi vnx4usi_y= {101,102,103,104};
+ vnx4usi vnx4usi_expect= {1,102,3,104};
+ vnx4usi vnx4usi_real;
+ merge5(vnx4usi_x,vnx4usi_y,&vnx4usi_real);
+ for(int i=0; i<4; i++)
+ if(vnx4usi_real[i]!=vnx4usi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2di vnx2di_x= {1,2};
+ vnx2di vnx2di_y= {101,102};
+ vnx2di vnx2di_expect= {1,102};
+ vnx2di vnx2di_real;
+ merge6(vnx2di_x,vnx2di_y,&vnx2di_real);
+ for(int i=0; i<2; i++)
+ if(vnx2di_real[i]!=vnx2di_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2udi vnx2udi_x= {1,2};
+ vnx2udi vnx2udi_y= {101,102};
+ vnx2udi vnx2udi_expect= {1,102};
+ vnx2udi vnx2udi_real;
+ merge7(vnx2udi_x,vnx2udi_y,&vnx2udi_real);
+ for(int i=0; i<2; i++)
+ if(vnx2udi_real[i]!=vnx2udi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8hf vnx8hf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0};
+ vnx8hf vnx8hf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1};
+ vnx8hf vnx8hf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1};
+ vnx8hf vnx8hf_real;
+ merge8(vnx8hf_x,vnx8hf_y,&vnx8hf_real);
+ for(int i=0; i<8; i++)
+ if(vnx8hf_real[i]!=vnx8hf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4sf vnx4sf_x= {1.0,2.0,3.0,4.0};
+ vnx4sf vnx4sf_y= {1.1,2.1,3.1,4.1};
+ vnx4sf vnx4sf_expect= {1.0,2.1,3.0,4.1};
+ vnx4sf vnx4sf_real;
+ merge9(vnx4sf_x,vnx4sf_y,&vnx4sf_real);
+ for(int i=0; i<4; i++)
+ if(vnx4sf_real[i]!=vnx4sf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2df vnx2df_x= {1.0,2.0};
+ vnx2df vnx2df_y= {1.1,2.1};
+ vnx2df vnx2df_expect= {1.0,2.1};
+ vnx2df vnx2df_real;
+ merge10(vnx2df_x,vnx2df_y,&vnx2df_real);
+ for(int i=0; i<2; i++)
+ if(vnx2df_real[i]!=vnx2df_expect[i]) {
+ __builtin_abort();
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,121 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-2.c"
+
+int main(void)
+{
+ vnx32qi vnx32qi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+ vnx32qi vnx32qi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+ vnx32qi vnx32qi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+ vnx32qi vnx32qi_real;
+ merge0(vnx32qi_x,vnx32qi_y,&vnx32qi_real);
+ for(int i=0; i<32; i++)
+ if(vnx32qi_real[i]!=vnx32qi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx32uqi vnx32uqi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+ vnx32uqi vnx32uqi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+ vnx32uqi vnx32uqi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+ vnx32uqi vnx32uqi_real;
+ merge1(vnx32uqi_x,vnx32uqi_y,&vnx32uqi_real);
+ for(int i=0; i<32; i++)
+ if(vnx32uqi_real[i]!=vnx32uqi_expect[i]) {
+ __builtin_abort();
+ }
+
+
+ vnx16hi vnx16hi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ vnx16hi vnx16hi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+ vnx16hi vnx16hi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+ vnx16hi vnx16hi_real;
+ merge2(vnx16hi_x,vnx16hi_y,&vnx16hi_real);
+ for(int i=0; i<16; i++)
+ if(vnx16hi_real[i]!=vnx16hi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx16uhi vnx16uhi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ vnx16uhi vnx16uhi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+ vnx16uhi vnx16uhi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+ vnx16uhi vnx16uhi_real;
+ merge3(vnx16uhi_x,vnx16uhi_y,&vnx16uhi_real);
+ for(int i=0; i<16; i++)
+ if(vnx16uhi_real[i]!=vnx16uhi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8si vnx8si_x= {1,2,3,4,5,6,7,8};
+ vnx8si vnx8si_y= {101,102,103,104,105,106,107,108};
+ vnx8si vnx8si_expect= {1,102,3,104,5,106,7,108};
+ vnx8si vnx8si_real;
+ merge4(vnx8si_x,vnx8si_y,&vnx8si_real);
+ for(int i=0; i<8; i++)
+ if(vnx8si_real[i]!=vnx8si_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8usi vnx8usi_x= {1,2,3,4,5,6,7,8};
+ vnx8usi vnx8usi_y= {101,102,103,104,105,106,107,108};
+ vnx8usi vnx8usi_expect= {1,102,3,104,5,106,7,108};
+ vnx8usi vnx8usi_real;
+ merge5(vnx8usi_x,vnx8usi_y,&vnx8usi_real);
+ for(int i=0; i<8; i++)
+ if(vnx8usi_real[i]!=vnx8usi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4di vnx4di_x= {1,2,3,4};
+ vnx4di vnx4di_y= {101,102,103,104};
+ vnx4di vnx4di_expect= {1,102,3,104};
+ vnx4di vnx4di_real;
+ merge6(vnx4di_x,vnx4di_y,&vnx4di_real);
+ for(int i=0; i<4; i++)
+ if(vnx4di_real[i]!=vnx4di_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4udi vnx4udi_x= {1,2,3,4};
+ vnx4udi vnx4udi_y= {101,102,103,104};
+ vnx4udi vnx4udi_expect= {1,102,3,104};
+ vnx4udi vnx4udi_real;
+ merge7(vnx4udi_x,vnx4udi_y,&vnx4udi_real);
+ for(int i=0; i<4; i++)
+ if(vnx4udi_real[i]!=vnx4udi_expect[i]) {
+ __builtin_abort();
+ }
+
+
+ vnx16hf vnx16hf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0};
+ vnx16hf vnx16hf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1};
+ vnx16hf vnx16hf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1};
+ vnx16hf vnx16hf_real;
+ merge8(vnx16hf_x,vnx16hf_y,&vnx16hf_real);
+ for(int i=0; i<8; i++)
+ if(vnx16hf_real[i]!=vnx16hf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8sf vnx8sf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0};
+ vnx8sf vnx8sf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1};
+ vnx8sf vnx8sf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1};
+ vnx8sf vnx8sf_real;
+ merge9(vnx8sf_x,vnx8sf_y,&vnx8sf_real);
+ for(int i=0; i<4; i++)
+ if(vnx8sf_real[i]!=vnx8sf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4df vnx4df_x= {1.0,2.0,3.0,4.0};
+ vnx4df vnx4df_y= {1.1,2.1,3.1,4.1};
+ vnx4df vnx4df_expect= {1.0,2.1,3.0,4.1};
+ vnx4df vnx4df_real;
+ merge10(vnx4df_x,vnx4df_y,&vnx4df_real);
+ for(int i=0; i<2; i++)
+ if(vnx4df_real[i]!=vnx4df_expect[i]) {
+ __builtin_abort();
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,150 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-3.c"
+
+int main(void)
+{
+ vnx64qi vnx64qi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,\
+ 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,\
+ 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,\
+ 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
+ };
+ vnx64qi vnx64qi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,\
+ 117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,\
+ 133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,\
+ 149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164
+ };
+ vnx64qi vnx64qi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,\
+ 17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132,\
+ 33,134,35,136,37,138,39,140,41,142,43,144,45,146,47,148,\
+ 49,150,51,152,53,154,55,156,57,158,59,160,61,162,63,164,
+ };
+ vnx64qi vnx64qi_real;
+ merge0(vnx64qi_x,vnx64qi_y,&vnx64qi_real);
+ for(int i=0; i<64; i++)
+ if(vnx64qi_real[i]!=vnx64qi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx64uqi vnx64uqi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,\
+ 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,\
+ 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,\
+ 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
+ };
+ vnx64uqi vnx64uqi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,\
+ 117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,\
+ 133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,\
+ 149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164
+ };
+ vnx64uqi vnx64uqi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,\
+ 17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132,\
+ 33,134,35,136,37,138,39,140,41,142,43,144,45,146,47,148,\
+ 49,150,51,152,53,154,55,156,57,158,59,160,61,162,63,164,
+ };
+ vnx64uqi vnx64uqi_real;
+ merge1(vnx64uqi_x,vnx64uqi_y,&vnx64uqi_real);
+ for(int i=0; i<64; i++)
+ if(vnx64uqi_real[i]!=vnx64uqi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx32hi vnx32hi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+ vnx32hi vnx32hi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+ vnx32hi vnx32hi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+ vnx32hi vnx32hi_real;
+ merge2(vnx32hi_x,vnx32hi_y,&vnx32hi_real);
+ for(int i=0; i<32; i++)
+ if(vnx32hi_real[i]!=vnx32hi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx32uhi vnx32uhi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+ vnx32uhi vnx32uhi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+ vnx32uhi vnx32uhi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+ vnx32uhi vnx32uhi_real;
+ merge3(vnx32uhi_x,vnx32uhi_y,&vnx32uhi_real);
+ for(int i=0; i<32; i++)
+ if(vnx32uhi_real[i]!=vnx32uhi_expect[i]) {
+ __builtin_abort();
+ }
+
+
+ vnx16si vnx16si_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ vnx16si vnx16si_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+ vnx16si vnx16si_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+ vnx16si vnx16si_real;
+ merge4(vnx16si_x,vnx16si_y,&vnx16si_real);
+ for(int i=0; i<16; i++)
+ if(vnx16si_real[i]!=vnx16si_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx16usi vnx16usi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ vnx16usi vnx16usi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+ vnx16usi vnx16usi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+ vnx16usi vnx16usi_real;
+ merge5(vnx16usi_x,vnx16usi_y,&vnx16usi_real);
+ for(int i=0; i<16; i++)
+ if(vnx16usi_real[i]!=vnx16usi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8di vnx8di_x= {1,2,3,4,5,6,7,8};
+ vnx8di vnx8di_y= {101,102,103,104,105,106,107,108};
+ vnx8di vnx8di_expect= {1,102,3,104,5,106,7,108};
+ vnx8di vnx8di_real;
+ merge6(vnx8di_x,vnx8di_y,&vnx8di_real);
+ for(int i=0; i<8; i++)
+ if(vnx8di_real[i]!=vnx8di_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8udi vnx8udi_x= {1,2,3,4,5,6,7,8};
+ vnx8udi vnx8udi_y= {101,102,103,104,105,106,107,108};
+ vnx8udi vnx8udi_expect= {1,102,3,104,5,106,7,108};
+ vnx8udi vnx8udi_real;
+ merge7(vnx8udi_x,vnx8udi_y,&vnx8udi_real);
+ for(int i=0; i<8; i++)
+ if(vnx8udi_real[i]!=vnx8udi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx32hf vnx32hf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,\
+ 17.0,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0
+ };
+ vnx32hf vnx32hf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1,\
+ 17.1,18.1,19.1,20.1,21.1,22.1,23.1,24.1,25.1,26.1,27.1,28.1,29.1,30.1,31.1,32.1
+ };
+ vnx32hf vnx32hf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1,\
+ 17.0,18.1,19.0,20.1,21.0,22.1,23.0,24.1,25.0,26.1,27.0,28.1,29.0,30.1,31.0,32.1
+ };
+ vnx32hf vnx32hf_real;
+ merge8(vnx32hf_x,vnx32hf_y,&vnx32hf_real);
+ for(int i=0; i<32; i++)
+ if(vnx32hf_real[i]!=vnx32hf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx16sf vnx16sf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0};
+ vnx16sf vnx16sf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1};
+ vnx16sf vnx16sf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1};
+ vnx16sf vnx16sf_real;
+ merge9(vnx16sf_x,vnx16sf_y,&vnx16sf_real);
+ for(int i=0; i<16; i++)
+ if(vnx16sf_real[i]!=vnx16sf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8df vnx8df_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0};
+ vnx8df vnx8df_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1};
+ vnx8df vnx8df_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1};
+ vnx8df vnx8df_real;
+ merge10(vnx8df_x,vnx8df_y,&vnx8df_real);
+ for(int i=0; i<8; i++)
+ if(vnx8df_real[i]!=vnx8df_expect[i]) {
+ __builtin_abort();
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,210 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-4.c"
+
+int main(void)
+{
+ vnx128qi vnx128qi_x= {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,\
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,\
+ 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,\
+ 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,\
+ 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,\
+ 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,\
+ 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,\
+ 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
+ };
+ vnx128qi vnx128qi_y= {128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,\
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,\
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,\
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,\
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,\
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,\
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,\
+ 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
+ };
+ vnx128qi vnx128qi_expect= {0,129,2,131,4,133,6,135,8,137,10,139,12,141,14,143,\
+ 16,145,18,147,20,149,22,151,24,153,26,155,28,157,30,159,\
+ 32,161,34,163,36,165,38,167,40,169,42,171,44,173,46,175,\
+ 48,177,50,179,52,181,54,183,56,185,58,187,60,189,62,191,\
+ 64,193,66,195,68,197,70,199,72,201,74,203,76,205,78,207,\
+ 80,209,82,211,84,213,86,215,88,217,90,219,92,221,94,223,\
+ 96,225,98,227,100,229,102,231,104,233,106,235,108,237,110,239,\
+ 112,241,114,243,116,245,118,247,120,249,122,251,124,253,126,255
+ };
+ vnx128qi vnx128qi_real;
+ merge0(vnx128qi_x,vnx128qi_y,&vnx128qi_real);
+ for(int i=0; i<128; i++)
+ if(vnx128qi_real[i]!=vnx128qi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx128uqi vnx128uqi_x= {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,\
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,\
+ 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,\
+ 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,\
+ 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,\
+ 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,\
+ 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,\
+ 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
+ };
+ vnx128uqi vnx128uqi_y= {128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,\
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,\
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,\
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,\
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,\
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,\
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,\
+ 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
+ };
+ vnx128uqi vnx128uqi_expect= {0,129,2,131,4,133,6,135,8,137,10,139,12,141,14,143,\
+ 16,145,18,147,20,149,22,151,24,153,26,155,28,157,30,159,\
+ 32,161,34,163,36,165,38,167,40,169,42,171,44,173,46,175,\
+ 48,177,50,179,52,181,54,183,56,185,58,187,60,189,62,191,\
+ 64,193,66,195,68,197,70,199,72,201,74,203,76,205,78,207,\
+ 80,209,82,211,84,213,86,215,88,217,90,219,92,221,94,223,\
+ 96,225,98,227,100,229,102,231,104,233,106,235,108,237,110,239,\
+ 112,241,114,243,116,245,118,247,120,249,122,251,124,253,126,255
+ };
+ vnx128uqi vnx128uqi_real;
+ merge1(vnx128uqi_x,vnx128uqi_y,&vnx128uqi_real);
+ for(int i=0; i<128; i++)
+ if(vnx128uqi_real[i]!=vnx128uqi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx64hi vnx64hi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,\
+ 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,\
+ 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,\
+ 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
+ };
+ vnx64hi vnx64hi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,\
+ 117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,\
+ 133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,\
+ 149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164
+ };
+ vnx64hi vnx64hi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,\
+ 17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132,\
+ 33,134,35,136,37,138,39,140,41,142,43,144,45,146,47,148,\
+ 49,150,51,152,53,154,55,156,57,158,59,160,61,162,63,164,
+ };
+ vnx64hi vnx64hi_real;
+ merge2(vnx64hi_x,vnx64hi_y,&vnx64hi_real);
+ for(int i=0; i<64; i++)
+ if(vnx64hi_real[i]!=vnx64hi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx64uhi vnx64uhi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,\
+ 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,\
+ 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,\
+ 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
+ };
+ vnx64uhi vnx64uhi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,\
+ 117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,\
+ 133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,\
+ 149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164
+ };
+ vnx64uhi vnx64uhi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,\
+ 17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132,\
+ 33,134,35,136,37,138,39,140,41,142,43,144,45,146,47,148,\
+ 49,150,51,152,53,154,55,156,57,158,59,160,61,162,63,164,
+ };
+ vnx64uhi vnx64uhi_real;
+ merge3(vnx64uhi_x,vnx64uhi_y,&vnx64uhi_real);
+ for(int i=0; i<64; i++)
+ if(vnx64uhi_real[i]!=vnx64uhi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx32si vnx32si_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+ vnx32si vnx32si_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+ vnx32si vnx32si_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+ vnx32si vnx32si_real;
+ merge4(vnx32si_x,vnx32si_y,&vnx32si_real);
+ for(int i=0; i<32; i++)
+ if(vnx32si_real[i]!=vnx32si_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx32usi vnx32usi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+ vnx32usi vnx32usi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132};
+ vnx32usi vnx32usi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116,17,118,19,120,21,122,23,124,25,126,27,128,29,130,31,132};
+ vnx32usi vnx32usi_real;
+ merge5(vnx32usi_x,vnx32usi_y,&vnx32usi_real);
+ for(int i=0; i<32; i++)
+ if(vnx32usi_real[i]!=vnx32usi_expect[i]) {
+ __builtin_abort();
+ }
+
+
+ vnx16di vnx16di_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ vnx16di vnx16di_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+ vnx16di vnx16di_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+ vnx16di vnx16di_real;
+ merge6(vnx16di_x,vnx16di_y,&vnx16di_real);
+ for(int i=0; i<16; i++)
+ if(vnx16di_real[i]!=vnx16di_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx16udi vnx16udi_x= {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ vnx16udi vnx16udi_y= {101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116};
+ vnx16udi vnx16udi_expect= {1,102,3,104,5,106,7,108,9,110,11,112,13,114,15,116};
+ vnx16udi vnx16udi_real;
+ merge7(vnx16udi_x,vnx16udi_y,&vnx16udi_real);
+ for(int i=0; i<16; i++)
+ if(vnx16udi_real[i]!=vnx16udi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx64hf vnx64hf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,\
+ 17.0,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0,\
+ 33.0,34.0,35.0,36.0,37.0,38.0,39.0,40.0,41.0,42.0,43.0,44.0,45.0,46.0,47.0,48.0,\
+ 49.0,50.0,51.0,52.0,53.0,54.0,55.0,56.0,57.0,58.0,59.0,60.0,61.0,62.0,63.0,64.0
+ };
+ vnx64hf vnx64hf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1,\
+ 17.1,18.1,19.1,20.1,21.1,22.1,23.1,24.1,25.1,26.1,27.1,28.1,29.1,30.1,31.1,32.1,\
+ 33.1,34.1,35.1,36.1,37.1,38.1,39.1,40.1,41.1,42.1,43.1,44.1,45.1,46.1,47.1,48.1,\
+ 49.1,50.1,51.1,52.1,53.1,54.1,55.1,56.1,57.1,58.1,59.1,60.1,61.1,62.1,63.1,64.1
+ };
+ vnx64hf vnx64hf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1,\
+ 17.0,18.1,19.0,20.1,21.0,22.1,23.0,24.1,25.0,26.1,27.0,28.1,29.0,30.1,31.0,32.1,\
+ 33.0,34.1,35.0,36.1,37.0,38.1,39.0,40.1,41.0,42.1,43.0,44.1,45.0,46.1,47.0,48.1,\
+ 49.0,50.1,51.0,52.1,53.0,54.1,55.0,56.1,57.0,58.1,59.0,60.1,61.0,62.1,63.0,64.1
+ };
+ vnx64hf vnx64hf_real;
+ merge8(vnx64hf_x,vnx64hf_y,&vnx64hf_real);
+ for(int i=0; i<64; i++)
+ if(vnx64hf_real[i]!=vnx64hf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx32sf vnx32sf_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,\
+ 17.0,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0
+ };
+ vnx32sf vnx32sf_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1,\
+ 17.1,18.1,19.1,20.1,21.1,22.1,23.1,24.1,25.1,26.1,27.1,28.1,29.1,30.1,31.1,32.1
+ };
+ vnx32sf vnx32sf_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1,\
+ 17.0,18.1,19.0,20.1,21.0,22.1,23.0,24.1,25.0,26.1,27.0,28.1,29.0,30.1,31.0,32.1
+ };
+ vnx32sf vnx32sf_real;
+ merge9(vnx32sf_x,vnx32sf_y,&vnx32sf_real);
+ for(int i=0; i<32; i++)
+ if(vnx32sf_real[i]!=vnx32sf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx16df vnx16df_x= {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0};
+ vnx16df vnx16df_y= {1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1};
+ vnx16df vnx16df_expect= {1.0,2.1,3.0,4.1,5.0,6.1,7.0,8.1,9.0,10.1,11.0,12.1,13.0,14.1,15.0,16.1};
+ vnx16df vnx16df_real;
+ merge10(vnx16df_x,vnx16df_y,&vnx16df_real);
+ for(int i=0; i<16; i++)
+ if(vnx16df_real[i]!=vnx16df_expect[i]) {
+ __builtin_abort();
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,89 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-5.c"
+
+int main(void)
+{
+ vnx8qi vnx8qi_x= {1,2,3,4,5,6,7,8};
+ vnx8qi vnx8qi_y= {101,102,103,104,105,106,107,108};
+ vnx8qi vnx8qi_expect= {1,102,3,104,5,106,7,108};
+ vnx8qi vnx8qi_real;
+ merge0(vnx8qi_x,vnx8qi_y,&vnx8qi_real);
+ for(int i=0; i<8; i++)
+ if(vnx8qi_real[i]!=vnx8qi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx8uqi vnx8uqi_x= {1,2,3,4,5,6,7,8};
+ vnx8uqi vnx8uqi_y= {101,102,103,104,105,106,107,108};
+ vnx8uqi vnx8uqi_expect= {1,102,3,104,5,106,7,108};
+ vnx8uqi vnx8uqi_real;
+ merge1(vnx8uqi_x,vnx8uqi_y,&vnx8uqi_real);
+ for(int i=0; i<8; i++)
+ if(vnx8uqi_real[i]!=vnx8uqi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4hi vnx4hi_x= {1,2,3,4};
+ vnx4hi vnx4hi_y= {101,102,103,104};
+ vnx4hi vnx4hi_expect= {1,102,3,104};
+ vnx4hi vnx4hi_real;
+ merge2(vnx4hi_x,vnx4hi_y,&vnx4hi_real);
+ for(int i=0; i<4; i++)
+ if(vnx4hi_real[i]!=vnx4hi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4uhi vnx4uhi_x= {1,2,3,4};
+ vnx4uhi vnx4uhi_y= {101,102,103,104};
+ vnx4uhi vnx4uhi_expect= {1,102,3,104};
+ vnx4uhi vnx4uhi_real;
+ merge3(vnx4uhi_x,vnx4uhi_y,&vnx4uhi_real);
+ for(int i=0; i<4; i++)
+ if(vnx4uhi_real[i]!=vnx4uhi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2si vnx2si_x= {1,2};
+ vnx2si vnx2si_y= {101,102};
+ vnx2si vnx2si_expect= {1,102};
+ vnx2si vnx2si_real;
+ merge4(vnx2si_x,vnx2si_y,&vnx2si_real);
+ for(int i=0; i<2; i++)
+ if(vnx2si_real[i]!=vnx2si_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2usi vnx2usi_x= {1,2};
+ vnx2usi vnx2usi_y= {101,102};
+ vnx2usi vnx2usi_expect= {1,102};
+ vnx2usi vnx2usi_real;
+ merge5(vnx2usi_x,vnx2usi_y,&vnx2usi_real);
+ for(int i=0; i<2; i++)
+ if(vnx2usi_real[i]!=vnx2usi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4hf vnx4hf_x= {1.0,2.0,3.0,4.0};
+ vnx4hf vnx4hf_y= {1.1,2.1,3.1,4.1};
+ vnx4hf vnx4hf_expect= {1.0,2.1,3.0,4.1};
+ vnx4hf vnx4hf_real;
+ merge6(vnx4hf_x,vnx4hf_y,&vnx4hf_real);
+ for(int i=0; i<4; i++)
+ if(vnx4hf_real[i]!=vnx4hf_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2sf vnx2sf_x= {1.0,2.0};
+ vnx2sf vnx2sf_y= {1.1,2.1};
+ vnx2sf vnx2sf_expect= {1.0,2.1};
+ vnx2sf vnx2sf_real;
+ merge7(vnx2sf_x,vnx2sf_y,&vnx2sf_real);
+ for(int i=0; i<2; i++)
+ if(vnx2sf_real[i]!=vnx2sf_expect[i]) {
+ __builtin_abort();
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,59 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-6.c"
+
+int main(void)
+{
+ vnx4qi vnx4qi_x= {1,2,3,4};
+ vnx4qi vnx4qi_y= {101,102,103,104};
+ vnx4qi vnx4qi_expect= {1,102,3,104};
+ vnx4qi vnx4qi_real;
+ merge0(vnx4qi_x,vnx4qi_y,&vnx4qi_real);
+ for(int i=0; i<4; i++)
+ if(vnx4qi_real[i]!=vnx4qi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx4uqi vnx4uqi_x= {1,2,3,4};
+ vnx4uqi vnx4uqi_y= {101,102,103,104};
+ vnx4uqi vnx4uqi_expect= {1,102,3,104};
+ vnx4uqi vnx4uqi_real;
+ merge1(vnx4uqi_x,vnx4uqi_y,&vnx4uqi_real);
+ for(int i=0; i<4; i++)
+ if(vnx4uqi_real[i]!=vnx4uqi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2hi vnx2hi_x= {1,2};
+ vnx2hi vnx2hi_y= {101,102};
+ vnx2hi vnx2hi_expect= {1,102};
+ vnx2hi vnx2hi_real;
+ merge2(vnx2hi_x,vnx2hi_y,&vnx2hi_real);
+ for(int i=0; i<2; i++)
+ if(vnx2hi_real[i]!=vnx2hi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2uhi vnx2uhi_x= {1,2};
+ vnx2uhi vnx2uhi_y= {101,102};
+ vnx2uhi vnx2uhi_expect= {1,102};
+ vnx2uhi vnx2uhi_real;
+ merge3(vnx2uhi_x,vnx2uhi_y,&vnx2uhi_real);
+ for(int i=0; i<2; i++)
+ if(vnx2uhi_real[i]!=vnx2uhi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2hf vnx2hf_x= {1.0,2.0};
+ vnx2hf vnx2hf_y= {1.1,2.1};
+ vnx2hf vnx2hf_expect= {1.0,2.1};
+ vnx2hf vnx2hf_real;
+ merge6(vnx2hf_x,vnx2hf_y,&vnx2hf_real);
+ for(int i=0; i<2; i++)
+ if(vnx2hf_real[i]!=vnx2hf_expect[i]) {
+ __builtin_abort();
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "merge-7.c"
+
+int main(void)
+{
+ vnx2qi vnx2qi_x= {1,2};
+ vnx2qi vnx2qi_y= {101,102};
+ vnx2qi vnx2qi_expect= {1,102};
+ vnx2qi vnx2qi_real;
+ merge0(vnx2qi_x,vnx2qi_y,&vnx2qi_real);
+ for(int i=0; i<2; i++)
+ if(vnx2qi_real[i]!=vnx2qi_expect[i]) {
+ __builtin_abort();
+ }
+
+ vnx2uqi vnx2uqi_x= {1,2};
+ vnx2uqi vnx2uqi_y= {101,102};
+ vnx2uqi vnx2uqi_expect= {1,102};
+ vnx2uqi vnx2uqi_real;
+ merge1(vnx2uqi_x,vnx2uqi_y,&vnx2uqi_real);
+ for(int i=0; i<2; i++)
+ if(vnx2uqi_real[i]!=vnx2uqi_expect[i]) {
+ __builtin_abort();
+ }
+
+ return 0;
+}