RISC-V: Fix ICE in LRA for LMUL < 1 vector spillings
Checks
Commit Message
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
gcc/ChangeLog:
* config/riscv/riscv-protos.h (emit_vlmax_vsetvl): Define as global.
(emit_vlmax_op): Ditto.
* config/riscv/riscv-v.cc (get_sew): New function.
(emit_vlmax_vsetvl): Adapt function.
(emit_pred_op): Ditto.
(emit_vlmax_op): Ditto.
(emit_nonvlmax_op): Ditto.
(legitimize_move): Fix LRA ICE.
(gen_no_side_effects_vsetvl_rtx): Adapt function.
* config/riscv/vector.md (@mov<V_FRACT:mode><P:mode>_lra): New pattern.
(@mov<VB:mode><P:mode>_lra): Ditto.
(*mov<V_FRACT:mode><P:mode>_lra): Ditto.
(*mov<VB:mode><P:mode>_lra): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/binop_vv_constraint-4.c: Adapt testcase.
* gcc.target/riscv/rvv/base/binop_vv_constraint-6.c: Ditto.
* gcc.target/riscv/rvv/base/binop_vx_constraint-127.c: Ditto.
* gcc.target/riscv/rvv/base/spill-1.c: Ditto.
* gcc.target/riscv/rvv/base/spill-2.c: Ditto.
* gcc.target/riscv/rvv/base/spill-3.c: Ditto.
* gcc.target/riscv/rvv/base/spill-5.c: Ditto.
* gcc.target/riscv/rvv/base/spill-7.c: Ditto.
* g++.target/riscv/rvv/base/bug-18.C: New test.
* gcc.target/riscv/rvv/base/merge_constraint-3.c: New test.
* gcc.target/riscv/rvv/base/merge_constraint-4.c: New test.
---
gcc/config/riscv/riscv-protos.h | 2 +
gcc/config/riscv/riscv-v.cc | 67 +++++--
gcc/config/riscv/vector.md | 56 ++++++
.../g++.target/riscv/rvv/base/bug-18.C | 140 +++++++++++++++
.../riscv/rvv/base/binop_vv_constraint-4.c | 1 +
.../riscv/rvv/base/binop_vv_constraint-6.c | 1 +
.../riscv/rvv/base/binop_vx_constraint-127.c | 2 +-
.../riscv/rvv/base/merge_constraint-3.c | 95 ++++++++++
.../riscv/rvv/base/merge_constraint-4.c | 28 +++
.../gcc.target/riscv/rvv/base/spill-1.c | 168 +++++++++---------
.../gcc.target/riscv/rvv/base/spill-2.c | 112 ++++++------
.../gcc.target/riscv/rvv/base/spill-3.c | 56 +++---
.../gcc.target/riscv/rvv/base/spill-5.c | 26 +--
.../gcc.target/riscv/rvv/base/spill-7.c | 161 +++++++++--------
14 files changed, 636 insertions(+), 279 deletions(-)
create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c
Comments
Committed as https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=cd0c433e5faba9a18f64881cd761a53a530aa798
with comment tweak.
On Wed, Mar 22, 2023 at 10:50 AM <juzhe.zhong@rivai.ai> wrote:
>
> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-protos.h (emit_vlmax_vsetvl): Define as global.
> (emit_vlmax_op): Ditto.
> * config/riscv/riscv-v.cc (get_sew): New function.
> (emit_vlmax_vsetvl): Adapt function.
> (emit_pred_op): Ditto.
> (emit_vlmax_op): Ditto.
> (emit_nonvlmax_op): Ditto.
> (legitimize_move): Fix LRA ICE.
> (gen_no_side_effects_vsetvl_rtx): Adapt function.
> * config/riscv/vector.md (@mov<V_FRACT:mode><P:mode>_lra): New pattern.
> (@mov<VB:mode><P:mode>_lra): Ditto.
> (*mov<V_FRACT:mode><P:mode>_lra): Ditto.
> (*mov<VB:mode><P:mode>_lra): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/binop_vv_constraint-4.c: Adapt testcase.
> * gcc.target/riscv/rvv/base/binop_vv_constraint-6.c: Ditto.
> * gcc.target/riscv/rvv/base/binop_vx_constraint-127.c: Ditto.
> * gcc.target/riscv/rvv/base/spill-1.c: Ditto.
> * gcc.target/riscv/rvv/base/spill-2.c: Ditto.
> * gcc.target/riscv/rvv/base/spill-3.c: Ditto.
> * gcc.target/riscv/rvv/base/spill-5.c: Ditto.
> * gcc.target/riscv/rvv/base/spill-7.c: Ditto.
> * g++.target/riscv/rvv/base/bug-18.C: New test.
> * gcc.target/riscv/rvv/base/merge_constraint-3.c: New test.
> * gcc.target/riscv/rvv/base/merge_constraint-4.c: New test.
>
> ---
> gcc/config/riscv/riscv-protos.h | 2 +
> gcc/config/riscv/riscv-v.cc | 67 +++++--
> gcc/config/riscv/vector.md | 56 ++++++
> .../g++.target/riscv/rvv/base/bug-18.C | 140 +++++++++++++++
> .../riscv/rvv/base/binop_vv_constraint-4.c | 1 +
> .../riscv/rvv/base/binop_vv_constraint-6.c | 1 +
> .../riscv/rvv/base/binop_vx_constraint-127.c | 2 +-
> .../riscv/rvv/base/merge_constraint-3.c | 95 ++++++++++
> .../riscv/rvv/base/merge_constraint-4.c | 28 +++
> .../gcc.target/riscv/rvv/base/spill-1.c | 168 +++++++++---------
> .../gcc.target/riscv/rvv/base/spill-2.c | 112 ++++++------
> .../gcc.target/riscv/rvv/base/spill-3.c | 56 +++---
> .../gcc.target/riscv/rvv/base/spill-5.c | 26 +--
> .../gcc.target/riscv/rvv/base/spill-7.c | 161 +++++++++--------
> 14 files changed, 636 insertions(+), 279 deletions(-)
> create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c
>
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> index f35aaf35b48..060dddbdc22 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -157,7 +157,9 @@ bool check_builtin_call (location_t, vec<location_t>, unsigned int,
> tree, unsigned int, tree *);
> bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
> bool legitimize_move (rtx, rtx, machine_mode);
> +void emit_vlmax_vsetvl (machine_mode, rtx);
> void emit_vlmax_op (unsigned, rtx, rtx, machine_mode);
> +void emit_vlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
> void emit_nonvlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
> enum vlmul_type get_vlmul (machine_mode);
> unsigned int get_ratio (machine_mode);
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 9b83ef6ea5e..d7b77fd6123 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -98,6 +98,15 @@ private:
> expand_operand m_ops[MAX_OPERANDS];
> };
>
> +static unsigned
> +get_sew (machine_mode mode)
> +{
> + unsigned int sew = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
> + ? 8
> + : GET_MODE_BITSIZE (GET_MODE_INNER (mode));
> + return sew;
> +}
> +
> /* Return true if X is a const_vector with all duplicate elements, which is in
> the range between MINVAL and MAXVAL. */
> bool
> @@ -109,13 +118,10 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval,
> && IN_RANGE (INTVAL (elt), minval, maxval));
> }
>
> -static rtx
> -emit_vlmax_vsetvl (machine_mode vmode)
> +void
> +emit_vlmax_vsetvl (machine_mode vmode, rtx vl)
> {
> - rtx vl = gen_reg_rtx (Pmode);
> - unsigned int sew = GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL
> - ? 8
> - : GET_MODE_BITSIZE (GET_MODE_INNER (vmode));
> + unsigned int sew = get_sew (vmode);
> enum vlmul_type vlmul = get_vlmul (vmode);
> unsigned int ratio = calculate_ratio (sew, vlmul);
>
> @@ -125,8 +131,6 @@ emit_vlmax_vsetvl (machine_mode vmode)
> const0_rtx));
> else
> emit_insn (gen_vlmax_avl (Pmode, vl, gen_int_mode (ratio, Pmode)));
> -
> - return vl;
> }
>
> /* Calculate SEW/LMUL ratio. */
> @@ -166,7 +170,7 @@ calculate_ratio (unsigned int sew, enum vlmul_type vlmul)
> /* Emit an RVV unmask && vl mov from SRC to DEST. */
> static void
> emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
> - machine_mode mask_mode)
> + machine_mode mask_mode, bool vlmax_p)
> {
> insn_expander<8> e;
> machine_mode mode = GET_MODE (dest);
> @@ -186,17 +190,18 @@ emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
> e.add_input_operand (len, Pmode);
> else
> {
> - rtx vlmax = emit_vlmax_vsetvl (mode);
> + rtx vlmax = gen_reg_rtx (Pmode);
> + emit_vlmax_vsetvl (mode, vlmax);
> e.add_input_operand (vlmax, Pmode);
> }
>
> if (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
> e.add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy ());
>
> - if (len)
> - e.add_avl_type_operand (avl_type::NONVLMAX);
> - else
> + if (vlmax_p)
> e.add_avl_type_operand (avl_type::VLMAX);
> + else
> + e.add_avl_type_operand (avl_type::NONVLMAX);
>
> e.expand ((enum insn_code) icode, MEM_P (dest) || MEM_P (src));
> }
> @@ -204,14 +209,21 @@ emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
> void
> emit_vlmax_op (unsigned icode, rtx dest, rtx src, machine_mode mask_mode)
> {
> - emit_pred_op (icode, NULL_RTX, dest, src, NULL_RTX, mask_mode);
> + emit_pred_op (icode, NULL_RTX, dest, src, NULL_RTX, mask_mode, true);
> +}
> +
> +void
> +emit_vlmax_op (unsigned icode, rtx dest, rtx src, rtx len,
> + machine_mode mask_mode)
> +{
> + emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode, true);
> }
>
> void
> emit_nonvlmax_op (unsigned icode, rtx dest, rtx src, rtx len,
> machine_mode mask_mode)
> {
> - emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode);
> + emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode, false);
> }
>
> static void
> @@ -265,6 +277,20 @@ legitimize_move (rtx dest, rtx src, machine_mode mask_mode)
> expand_const_vector (dest, src, mask_mode);
> return true;
> }
> +
> + /* In order to decrease the memory traffic, we don't use whole register
> + * load/store for the LMUL less than 1 and mask mode, so those case will
> + * require one extra general purpose register, but it's not allowed during LRA
> + * process, so we have a special move pattern used for LRA, which will defer
> + * the expansion after LRA. */
> + if ((known_lt (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR)
> + || GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
> + && lra_in_progress)
> + {
> + emit_insn (gen_mov_lra (mode, Pmode, dest, src));
> + return true;
> + }
> +
> if (known_ge (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR)
> && GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
> {
> @@ -274,6 +300,13 @@ legitimize_move (rtx dest, rtx src, machine_mode mask_mode)
>
> return false;
> }
> +
> + if (register_operand (src, mode) && register_operand (dest, mode))
> + {
> + emit_insn (gen_rtx_SET (dest, src));
> + return true;
> + }
> +
> if (!register_operand (src, mode) && !register_operand (dest, mode))
> {
> rtx tmp = gen_reg_rtx (mode);
> @@ -540,9 +573,7 @@ force_vector_length_operand (rtx vl)
> static rtx
> gen_no_side_effects_vsetvl_rtx (machine_mode vmode, rtx vl, rtx avl)
> {
> - unsigned int sew = GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL
> - ? 8
> - : GET_MODE_BITSIZE (GET_MODE_INNER (vmode));
> + unsigned int sew = get_sew (vmode);
> return gen_vsetvl_no_side_effects (Pmode, vl, avl, gen_int_mode (sew, Pmode),
> gen_int_mode (get_vlmul (vmode), Pmode),
> const0_rtx, const0_rtx);
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index 96dc1af5a3d..ebb014aecb1 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -635,6 +635,62 @@
> [(set_attr "type" "vmov")
> (set_attr "mode" "<MODE>")])
>
> +(define_expand "@mov<V_FRACT:mode><P:mode>_lra"
> + [(parallel
> + [(set (match_operand:V_FRACT 0 "reg_or_mem_operand")
> + (match_operand:V_FRACT 1 "reg_or_mem_operand"))
> + (clobber (match_scratch:P 2))])]
> + "TARGET_VECTOR && (lra_in_progress || reload_completed)"
> +{})
> +
> +(define_expand "@mov<VB:mode><P:mode>_lra"
> + [(parallel
> + [(set (match_operand:VB 0 "reg_or_mem_operand")
> + (match_operand:VB 1 "reg_or_mem_operand"))
> + (clobber (match_scratch:P 2))])]
> + "TARGET_VECTOR && (lra_in_progress || reload_completed)"
> +{})
> +
> +(define_insn_and_split "*mov<V_FRACT:mode><P:mode>_lra"
> + [(set (match_operand:V_FRACT 0 "reg_or_mem_operand" "=vr, m,vr")
> + (match_operand:V_FRACT 1 "reg_or_mem_operand" " m,vr,vr"))
> + (clobber (match_scratch:P 2 "=&r,&r,X"))]
> + "TARGET_VECTOR && (lra_in_progress || reload_completed)"
> + "#"
> + "&& reload_completed"
> + [(const_int 0)]
> +{
> + if (REG_P (operands[0]) && REG_P (operands[1]))
> + emit_insn (gen_rtx_SET (operands[0], operands[1]));
> + else
> + {
> + riscv_vector::emit_vlmax_vsetvl (<V_FRACT:MODE>mode, operands[2]);
> + riscv_vector::emit_vlmax_op (code_for_pred_mov (<V_FRACT:MODE>mode),
> + operands[0], operands[1], operands[2], <VM>mode);
> + }
> + DONE;
> +})
> +
> +(define_insn_and_split "*mov<VB:mode><P:mode>_lra"
> + [(set (match_operand:VB 0 "reg_or_mem_operand" "=vr, m,vr")
> + (match_operand:VB 1 "reg_or_mem_operand" " m,vr,vr"))
> + (clobber (match_scratch:P 2 "=&r,&r,X"))]
> + "TARGET_VECTOR && (lra_in_progress || reload_completed)"
> + "#"
> + "&& reload_completed"
> + [(const_int 0)]
> +{
> + if (REG_P (operands[0]) && REG_P (operands[1]))
> + emit_insn (gen_rtx_SET (operands[0], operands[1]));
> + else
> + {
> + riscv_vector::emit_vlmax_vsetvl (<VB:MODE>mode, operands[2]);
> + riscv_vector::emit_vlmax_op (code_for_pred_mov (<VB:MODE>mode),
> + operands[0], operands[1], operands[2], <VB:MODE>mode);
> + }
> + DONE;
> +})
> +
> ;; -----------------------------------------------------------------
> ;; ---- Duplicate Operations
> ;; -----------------------------------------------------------------
> diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C b/gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C
> new file mode 100644
> index 00000000000..d6088338dbc
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C
> @@ -0,0 +1,140 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +#include <iostream>
> +#include "riscv_vector.h"
> +using std::cerr;
> +using std::endl;
> +template < class , class b > int c(b val) {
> + return val;
> +}
> +auto &f32(c< float, uint32_t >);
> +template < class d >
> +bool check(d , d , size_t );
> +int main() {
> + size_t e ;
> + int16_t f[] {};
> + size_t g ;
> + int32_t i[] {4784};
> + size_t aa = 4;
> + int16_t ab[] {2313};
> + int16_t j[] {7114 };
> + int16_t k[] {7696 };
> + uint32_t l[] {9951 };
> + int32_t m[] {2659 };
> + uint16_t n[] {7537 };
> + int32_t o[] {05733}
> + ;
> + uint32_t p[] {7010090 };
> + uint32_t q[] {21060 };
> + uint32_t r[] {2273 };
> + uint32_t s[] {4094366 };
> + int16_t ac[] {11880 };
> + int16_t t[] {10988};
> + int16_t ad[] {30376};
> + int8_t u[] {};
> + int8_t ae[] {7};
> + int8_t v[] {40};
> + int8_t af[] {6};
> + int16_t w[] {4077 };
> + int16_t x[] {7932 };
> + int8_t y[] {3};
> + int8_t z[] {4};
> + uint16_t ag[] {2831};
> + int16_t ah[] {10412 };
> + int16_t ai[] {6823};
> + int32_t aj[] {8572 };
> + int32_t ak[] {9999 };
> + uint32_t al[] {50166962 };
> + uint32_t am[] {9781 };
> + int8_t an[] {9, 35};
> + float ao[] {222.65, 22.79};
> + float ap[] {126.10, 13.92};
> + int64_t aq[] {508727, 5556};
> + int16_t ar[] {2861 };
> + int16_t as[] {21420};
> + int16_t at[] {4706 };
> + uint32_t au ;
> + uint32_t av = 600295662;
> + size_t aw ;
> + int16_t ax = 13015;
> + uint32_t ay ;
> + uint16_t az = 10652;
> + int32_t ba ;
> + int8_t bb ;
> + int64_t bc = 40183771683589512;
> +
> +asm volatile ("ttt":::"memory");
> + vint16mf4_t bd = __riscv_vle16_v_i16mf4(j, 2);
> + vuint32mf2_t be = __riscv_vle32_v_u32mf2(l, 2);
> + vint32mf2_t bf = __riscv_vle32_v_i32mf2(m, 2);
> + vuint16mf4_t bg = __riscv_vle16_v_u16mf4(n, 2);
> + vint8mf4_t bh ;
> + vuint32m2_t bi = __riscv_vle32_v_u32m2(p, 2);
> + vuint32m2_t bj = __riscv_vle32_v_u32m2(q, 2);
> + vuint32m2_t bk = __riscv_vle32_v_u32m2(r, 2);
> + vuint32m2_t bl = __riscv_vle32_v_u32m2(s, 2);
> + vint16m1_t bm = __riscv_vle16_v_i16m1(ac, 2);
> + vint16m1_t bn = __riscv_vle16_v_i16m1(t, 2);
> + vint8mf2_t bo = __riscv_vle8_v_i8mf2(u, 1);
> + vint8mf2_t bp = __riscv_vle8_v_i8mf2(ae, 1);
> + vint8mf8_t bq = __riscv_vle8_v_i8mf8(af, 1);
> + vint16mf4_t br = __riscv_vle16_v_i16mf4(w, 2);
> + vint16mf4_t bs = __riscv_vle16_v_i16mf4(x, 2);
> + vint8mf8_t bt = __riscv_vle8_v_i8mf8(y, 1);
> + vint8mf8_t bu = __riscv_vle8_v_i8mf8(z, 1);
> + vuint16mf4_t bv = __riscv_vle16_v_u16mf4(ag, 1);
> + vint16mf4_t bw = __riscv_vle16_v_i16mf4(ah, 2);
> + vint16mf4_t bx = __riscv_vle16_v_i16mf4(ai, 2);
> + vint32mf2_t by = __riscv_vle32_v_i32mf2(aj, 2);
> + vint32mf2_t bz = __riscv_vle32_v_i32mf2(ak, 2);
> + vuint32mf2_t ca = __riscv_vle32_v_u32mf2(al, 2);
> + vuint32mf2_t cb = __riscv_vle32_v_u32mf2(am, 2);
> + vint8mf8_t cc = __riscv_vle8_v_i8mf8(an, 2);
> + vfloat32mf2_t cd = __riscv_vle32_v_f32mf2(ao, 2);
> + vfloat32mf2_t ce = __riscv_vle32_v_f32mf2(ap, 2);
> + vint64m1_t cf = __riscv_vle64_v_i64m1(aq, 2);
> + vint16mf4_t cg = __riscv_vle16_v_i16mf4(ar, 2);
> + vint16mf4_t ch = __riscv_vle16_v_i16mf4(as, 2);
> + vint16mf4_t var_62 = __riscv_vle16_v_i16mf4(at, 2);
> + vbool64_t var_20 = __riscv_vmadc_vx_u32mf2_b64(be, ay, 2);
> + int8_t var_17 = __riscv_vmv_x_s_i8mf4_i8(bh);
> + vbool16_t var_28 = __riscv_vmsltu_vv_u32m2_b16(bk, bl, 2);
> + vint8mf2_t var_14 = __riscv_vadd_vv_i8mf2(bo, bp, 1);
> + vbool64_t var_8 = __riscv_vmseq_vv_i16mf4_b64(br, bs, 2);
> + vbool64_t var_42 = __riscv_vmsbc_vx_u16mf4_b64(bv, az, 1);
> + vbool64_t var_46 = __riscv_vmsge_vx_i32mf2_b64(by, ba, 2);
> + vint16mf4_t var_4 = __riscv_vncvt_x_x_w_i16mf4(bz, 2);
> + vbool64_t var_51 = __riscv_vmsgt_vx_i8mf8_b64(cc, bb, 2);
> + vbool64_t var_56 = __riscv_vmfne_vv_f32mf2_b64(cd, ce, 2);
> + vbool64_t var_55 = __riscv_vmseq_vx_i64m1_b64(cf, bc, 2);
> + vuint32m2_t var_16 = __riscv_vslideup_vx_u32m2_mu(var_28, bi, bj, aw, 2);
> + vint8mf2_t var_12 = __riscv_vmulh_vv_i8mf2(var_14, var_14, 1);
> + vint16mf4_t var_0 = __riscv_vdiv_vv_i16mf4_mu(var_8, var_4, ch, var_62, 2);
> + vuint32m2_t var_13 = __riscv_vsub_vx_u32m2(var_16, av, 2);
> + int8_t var_9 = __riscv_vmv_x_s_i8mf2_i8(var_12);
> + vint16mf4_t var_19 = __riscv_vor_vx_i16mf4_mu(var_20, var_0, bd, ax, 2);
> + uint32_t var_10 = __riscv_vmv_x_s_u32m2_u32(var_13);
> + vint8mf8_t var_7 = __riscv_vmadd_vx_i8mf8_mu(var_42, bt, var_9, bu, 1);
> + __riscv_vse16_v_i16mf4(k, var_19, 2);
> + vuint32mf2_t var_3 =
> + __riscv_vslide1down_vx_u32mf2_mu(var_51, ca, cb, var_10, 2);
> + if (check(k, ab, aa))
> + cerr << "check 8 fails" << endl;
> + vbool64_t var_2 = __riscv_vmsne_vx_u32mf2_b64_mu(var_55, var_56, var_3, au, 2);
> + vint16mf4_t var_1 = __riscv_vssub_vv_i16mf4_mu(var_2, var_0, var_4, cg, 2);
> + vint16mf4_t var_5 = __riscv_vxor_vv_i16mf4_mu(var_46, var_1, bw, bx, 2);
> + vint32mf2_t var_18 = __riscv_vwmaccsu_vv_i32mf2(bf, var_1, bg, 2);
> + vint8mf8_t var_6 = __riscv_vncvt_x_x_w_i8mf8_mu(var_8, var_7, var_5, 1);
> + vint16m1_t var_15 = __riscv_vredand_vs_i16mf4_i16m1_tu(bm, var_5, bn, 2);
> + __riscv_vse32_v_i32mf2(o, var_18, 2);
> + vbool64_t var_11 = __riscv_vmsge_vx_i8mf8_b64(var_6, var_17, 1);
> + __riscv_vse16_v_i16m1(ad, var_15, 1);
> + if (check(o, i, g))
> + cerr << "check 1 fails" << endl;
> + __riscv_vse8_v_i8mf8_m(var_11, v, bq, 1);
> + if (check(ad, f, e))
> + cerr << "check 4 fails" << endl;
> + cerr << "check 7 fails" << endl;
> + return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c
> index 1b0afed037a..552c264d895 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c
> @@ -24,3 +24,4 @@ void f2 (void * in, void *out, int32_t x)
> __riscv_vsm_v_b32 (out, m4, 4);
> }
>
> +/* { dg-final { scan-assembler-times {vmv} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c
> index 384e2301a69..6a65fb576e8 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c
> @@ -24,4 +24,5 @@ void f2 (void * in, void *out, int32_t x)
> __riscv_vsm_v_b32 (out, m4, 4);
> }
>
> +/* { dg-final { scan-assembler-times {vmv} 2 } } */
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c
> index a353a7ab2d5..3933c35f4ce 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c
> @@ -24,4 +24,4 @@ void f2 (void * in, void *out, int32_t x)
> __riscv_vsm_v_b32 (out, m4, 4);
> }
>
> -
> +/* { dg-final { scan-assembler-times {vmv} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c
> new file mode 100644
> index 00000000000..d9cbc853918
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c
> @@ -0,0 +1,95 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
> +{
> + vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl);
> + vuint16m1_t v2 = __riscv_vle16_v_u16m1 (base2, vl);
> + vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl);
> + vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl);
> + asm volatile("#" ::
> + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
> + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
> + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
> + "v26", "v27", "v28");
> +
> + vbool16_t v = __riscv_vmseq_vv_u16m1_b16_mu(m1,m2,v1,v2,vl);
> + asm volatile("#" ::
> + : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
> + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
> + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
> + "v26", "v27");
> +
> + __riscv_vsm_v_b16 (out,v,vl);
> +}
> +
> +void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t shift)
> +{
> + vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl);
> + vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl);
> + vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl);
> + asm volatile("#" ::
> + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
> + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
> + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
> + "v26", "v27", "v28", "v29");
> +
> + vbool16_t v = __riscv_vmseq_vx_u16m1_b16_mu(m1,m2,v1,shift,vl);
> + asm volatile("#" ::
> + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
> + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
> + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
> + "v26", "v27", "v28", "v29");
> +
> +
> + __riscv_vsm_v_b16 (out,v,vl);
> +}
> +
> +void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
> +{
> + vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl);
> + vuint16m1_t v2 = __riscv_vle16_v_u16m1 (base2, vl);
> + vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl);
> + vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl);
> + asm volatile("#" ::
> + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
> + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
> + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
> + "v26", "v27", "v28");
> +
> + vbool16_t v = __riscv_vmsltu_vv_u16m1_b16_mu(m1,m2,v1,v2,vl);
> + asm volatile("#" ::
> + : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
> + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
> + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
> + "v26", "v27");
> +
> + __riscv_vsm_v_b16 (out,v,vl);
> +}
> +
> +void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t shift)
> +{
> + vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl);
> + vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl);
> + vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl);
> + asm volatile("#" ::
> + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
> + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
> + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
> + "v26", "v27", "v28", "v29");
> +
> + vbool16_t v = __riscv_vmsltu_vx_u16m1_b16_mu(m1,m2,v1,shift,vl);
> + asm volatile("#" ::
> + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
> + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
> + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
> + "v26", "v27", "v28", "v29");
> +
> +
> + __riscv_vsm_v_b16 (out,v,vl);
> +}
> +
> +/* { dg-final { scan-assembler-not {vmv} } } */
> +/* { dg-final { scan-assembler-not {csrr} } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c
> new file mode 100644
> index 00000000000..db245b02570
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void f (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t shift)
> +{
> + vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl);
> + vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl);
> + vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl);
> + asm volatile("#" ::
> + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
> + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
> + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
> + "v26", "v27", "v28", "v29");
> +
> + vbool16_t v = __riscv_vmsltu_vx_u16m1_b16_mu(m1,m2,v1,shift,vl);
> + asm volatile("#" ::
> + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
> + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
> + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
> + "v26", "v27", "v28", "v29", "v30", "v31");
> +
> +
> + __riscv_vsm_v_b16 (out,v,vl);
> +}
> +
> +/* { dg-final { scan-assembler-times {vmv} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c
> index ec38a828ee7..2f2d85807ec 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c
> @@ -10,20 +10,20 @@
> ** csrr\tt0,vlenb
> ** sub\tsp,sp,t0
> ** ...
> -** csrr\ta2,vlenb
> -** srli\ta2,a2,3
> -** slli\ta3,a2,3
> -** sub\ta3,a3,a2
> -** add\ta3,a3,sp
> -** vse8.v\tv[0-9]+,0\(a3\)
> -** ...
> -** csrr\ta2,vlenb
> -** srli\ta2,a2,3
> -** slli\ta3,a2,3
> -** sub\ta3,a3,a2
> -** add\ta3,a3,sp
> -** vle8.v\tv[0-9]+,0\(a3\)
> -** vse8.v\tv[0-9]+,0\(a1\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,3
> +** slli\t[a-x0-9]+,[a-x0-9]+,3
> +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ...
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,3
> +** slli\t[a-x0-9]+,[a-x0-9]+,3
> +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> ** csrr\tt0,vlenb
> ** add\tsp,sp,t0
> ** ...
> @@ -43,20 +43,20 @@ spill_1 (int8_t *in, int8_t *out)
> ** sub\tsp,sp,t0
> ** vsetvli\ta5,zero,e8,mf4,ta,ma
> ** vle8.v\tv[0-9]+,0\(a0\)
> -** csrr\ta2,vlenb
> -** srli\ta2,a2,2
> -** slli\ta3,a2,2
> -** sub\ta3,a3,a2
> -** add\ta3,a3,sp
> -** vse8.v\tv[0-9]+,0\(a3\)
> -** ...
> -** csrr\ta2,vlenb
> -** srli\ta2,a2,2
> -** slli\ta3,a2,2
> -** sub\ta3,a3,a2
> -** add\ta3,a3,sp
> -** vle8.v\tv[0-9]+,0\(a3\)
> -** vse8.v\tv[0-9]+,0\(a1\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,2
> +** slli\t[a-x0-9]+,[a-x0-9]+,2
> +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ...
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,2
> +** slli\t[a-x0-9]+,[a-x0-9]+,2
> +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> ** csrr\tt0,vlenb
> ** add\tsp,sp,t0
> ** ...
> @@ -76,16 +76,16 @@ spill_2 (int8_t *in, int8_t *out)
> ** sub\tsp,sp,t0
> ** vsetvli\ta5,zero,e8,mf2,ta,ma
> ** vle8.v\tv[0-9]+,0\(a0\)
> -** csrr\ta3,vlenb
> -** srli\ta3,a3,1
> -** add\ta3,a3,sp
> -** vse8.v\tv[0-9]+,0\(a3\)
> -** ...
> -** csrr\ta3,vlenb
> -** srli\ta3,a3,1
> -** add\ta3,a3,sp
> -** vle8.v\tv[0-9]+,0\(a3\)
> -** vse8.v\tv[0-9]+,0\(a1\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ...
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> ** csrr\tt0,vlenb
> ** add\tsp,sp,t0
> ** ...
> @@ -107,7 +107,7 @@ spill_3 (int8_t *in, int8_t *out)
> ** vs1r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl1re8.v\tv2,0\(sp\)
> -** vs1r.v\tv2,0\(a1\)
> +** vs1r.v\tv2,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -131,7 +131,7 @@ spill_4 (int8_t *in, int8_t *out)
> ** vs2r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl2re8.v\tv4,0\(sp\)
> -** vs2r.v\tv4,0\(a1\)
> +** vs2r.v\tv4,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -155,7 +155,7 @@ spill_5 (int8_t *in, int8_t *out)
> ** vs4r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl4re8.v\tv8,0\(sp\)
> -** vs4r.v\tv8,0\(a1\)
> +** vs4r.v\tv8,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -179,7 +179,7 @@ spill_6 (int8_t *in, int8_t *out)
> ** vs8r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl8re8.v\tv16,0\(sp\)
> -** vs8r.v\tv16,0\(a1\)
> +** vs8r.v\tv16,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -200,20 +200,20 @@ spill_7 (int8_t *in, int8_t *out)
> ** sub\tsp,sp,t0
> ** vsetvli\ta5,zero,e8,mf8,ta,ma
> ** vle8.v\tv[0-9]+,0\(a0\)
> -** csrr\ta2,vlenb
> -** srli\ta2,a2,3
> -** slli\ta3,a2,3
> -** sub\ta3,a3,a2
> -** add\ta3,a3,sp
> -** vse8.v\tv[0-9]+,0\(a3\)
> -** ...
> -** csrr\ta2,vlenb
> -** srli\ta2,a2,3
> -** slli\ta3,a2,3
> -** sub\ta3,a3,a2
> -** add\ta3,a3,sp
> -** vle8.v\tv[0-9]+,0\(a3\)
> -** vse8.v\tv[0-9]+,0\(a1\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,3
> +** slli\t[a-x0-9]+,[a-x0-9]+,3
> +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ...
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,3
> +** slli\t[a-x0-9]+,[a-x0-9]+,3
> +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> ** csrr\tt0,vlenb
> ** add\tsp,sp,t0
> ** ...
> @@ -233,20 +233,20 @@ spill_8 (uint8_t *in, uint8_t *out)
> ** sub\tsp,sp,t0
> ** vsetvli\ta5,zero,e8,mf4,ta,ma
> ** vle8.v\tv[0-9]+,0\(a0\)
> -** csrr\ta2,vlenb
> -** srli\ta2,a2,2
> -** slli\ta3,a2,2
> -** sub\ta3,a3,a2
> -** add\ta3,a3,sp
> -** vse8.v\tv[0-9]+,0\(a3\)
> -** ...
> -** csrr\ta2,vlenb
> -** srli\ta2,a2,2
> -** slli\ta3,a2,2
> -** sub\ta3,a3,a2
> -** add\ta3,a3,sp
> -** vle8.v\tv[0-9]+,0\(a3\)
> -** vse8.v\tv[0-9]+,0\(a1\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,2
> +** slli\t[a-x0-9]+,[a-x0-9]+,2
> +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ...
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,2
> +** slli\t[a-x0-9]+,[a-x0-9]+,2
> +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> ** csrr\tt0,vlenb
> ** add\tsp,sp,t0
> ** ...
> @@ -266,16 +266,16 @@ spill_9 (uint8_t *in, uint8_t *out)
> ** sub\tsp,sp,t0
> ** vsetvli\ta5,zero,e8,mf2,ta,ma
> ** vle8.v\tv[0-9]+,0\(a0\)
> -** csrr\ta3,vlenb
> -** srli\ta3,a3,1
> -** add\ta3,a3,sp
> -** vse8.v\tv[0-9]+,0\(a3\)
> -** ...
> -** csrr\ta3,vlenb
> -** srli\ta3,a3,1
> -** add\ta3,a3,sp
> -** vle8.v\tv[0-9]+,0\(a3\)
> -** vse8.v\tv[0-9]+,0\(a1\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ...
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> ** csrr\tt0,vlenb
> ** add\tsp,sp,t0
> ** ...
> @@ -297,7 +297,7 @@ spill_10 (uint8_t *in, uint8_t *out)
> ** vs1r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl1re8.v\tv2,0\(sp\)
> -** vs1r.v\tv2,0\(a1\)
> +** vs1r.v\tv2,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -321,7 +321,7 @@ spill_11 (uint8_t *in, uint8_t *out)
> ** vs2r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl2re8.v\tv4,0\(sp\)
> -** vs2r.v\tv4,0\(a1\)
> +** vs2r.v\tv4,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -345,7 +345,7 @@ spill_12 (uint8_t *in, uint8_t *out)
> ** vs4r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl4re8.v\tv8,0\(sp\)
> -** vs4r.v\tv8,0\(a1\)
> +** vs4r.v\tv8,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -369,7 +369,7 @@ spill_13 (uint8_t *in, uint8_t *out)
> ** vs8r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl8re8.v\tv16,0\(sp\)
> -** vs8r.v\tv16,0\(a1\)
> +** vs8r.v\tv16,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c
> index 147a727b134..4bcaf4dce79 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c
> @@ -11,20 +11,20 @@
> ** sub\tsp,sp,t0
> ** vsetvli\ta5,zero,e16,mf4,ta,ma
> ** vle16.v\tv[0-9]+,0\(a0\)
> -** csrr\ta2,vlenb
> -** srli\ta2,a2,2
> -** slli\ta3,a2,2
> -** sub\ta3,a3,a2
> -** add\ta3,a3,sp
> -** vse16.v\tv[0-9]+,0\(a3\)
> -** ...
> -** csrr\ta2,vlenb
> -** srli\ta2,a2,2
> -** slli\ta3,a2,2
> -** sub\ta3,a3,a2
> -** add\ta3,a3,sp
> -** vle16.v\tv[0-9]+,0\(a3\)
> -** vse16.v\tv[0-9]+,0\(a1\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,2
> +** slli\t[a-x0-9]+,[a-x0-9]+,2
> +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ...
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,2
> +** slli\t[a-x0-9]+,[a-x0-9]+,2
> +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vle16.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
> ** csrr\tt0,vlenb
> ** add\tsp,sp,t0
> ** ...
> @@ -44,16 +44,16 @@ spill_2 (int16_t *in, int16_t *out)
> ** sub\tsp,sp,t0
> ** vsetvli\ta5,zero,e16,mf2,ta,ma
> ** vle16.v\tv[0-9]+,0\(a0\)
> -** csrr\ta3,vlenb
> -** srli\ta3,a3,1
> -** add\ta3,a3,sp
> -** vse16.v\tv[0-9]+,0\(a3\)
> -** ...
> -** csrr\ta3,vlenb
> -** srli\ta3,a3,1
> -** add\ta3,a3,sp
> -** vle16.v\tv[0-9]+,0\(a3\)
> -** vse16.v\tv[0-9]+,0\(a1\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ...
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vle16.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
> ** csrr\tt0,vlenb
> ** add\tsp,sp,t0
> ** ...
> @@ -75,7 +75,7 @@ spill_3 (int16_t *in, int16_t *out)
> ** vs1r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl1re16.v\tv2,0\(sp\)
> -** vs1r.v\tv2,0\(a1\)
> +** vs1r.v\tv2,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -99,7 +99,7 @@ spill_4 (int16_t *in, int16_t *out)
> ** vs2r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl2re16.v\tv4,0\(sp\)
> -** vs2r.v\tv4,0\(a1\)
> +** vs2r.v\tv4,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -123,7 +123,7 @@ spill_5 (int16_t *in, int16_t *out)
> ** vs4r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl4re16.v\tv8,0\(sp\)
> -** vs4r.v\tv8,0\(a1\)
> +** vs4r.v\tv8,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -147,7 +147,7 @@ spill_6 (int16_t *in, int16_t *out)
> ** vs8r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl8re16.v\tv16,0\(sp\)
> -** vs8r.v\tv16,0\(a1\)
> +** vs8r.v\tv16,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -168,20 +168,20 @@ spill_7 (int16_t *in, int16_t *out)
> ** sub\tsp,sp,t0
> ** vsetvli\ta5,zero,e16,mf4,ta,ma
> ** vle16.v\tv[0-9]+,0\(a0\)
> -** csrr\ta2,vlenb
> -** srli\ta2,a2,2
> -** slli\ta3,a2,2
> -** sub\ta3,a3,a2
> -** add\ta3,a3,sp
> -** vse16.v\tv[0-9]+,0\(a3\)
> -** ...
> -** csrr\ta2,vlenb
> -** srli\ta2,a2,2
> -** slli\ta3,a2,2
> -** sub\ta3,a3,a2
> -** add\ta3,a3,sp
> -** vle16.v\tv[0-9]+,0\(a3\)
> -** vse16.v\tv[0-9]+,0\(a1\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,2
> +** slli\t[a-x0-9]+,[a-x0-9]+,2
> +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ...
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,2
> +** slli\t[a-x0-9]+,[a-x0-9]+,2
> +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vle16.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
> ** csrr\tt0,vlenb
> ** add\tsp,sp,t0
> ** ...
> @@ -201,16 +201,16 @@ spill_9 (uint16_t *in, uint16_t *out)
> ** sub\tsp,sp,t0
> ** vsetvli\ta5,zero,e16,mf2,ta,ma
> ** vle16.v\tv[0-9]+,0\(a0\)
> -** csrr\ta3,vlenb
> -** srli\ta3,a3,1
> -** add\ta3,a3,sp
> -** vse16.v\tv[0-9]+,0\(a3\)
> -** ...
> -** csrr\ta3,vlenb
> -** srli\ta3,a3,1
> -** add\ta3,a3,sp
> -** vle16.v\tv[0-9]+,0\(a3\)
> -** vse16.v\tv[0-9]+,0\(a1\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ...
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vle16.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
> ** csrr\tt0,vlenb
> ** add\tsp,sp,t0
> ** ...
> @@ -232,7 +232,7 @@ spill_10 (uint16_t *in, uint16_t *out)
> ** vs1r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl1re16.v\tv2,0\(sp\)
> -** vs1r.v\tv2,0\(a1\)
> +** vs1r.v\tv2,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -256,7 +256,7 @@ spill_11 (uint16_t *in, uint16_t *out)
> ** vs2r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl2re16.v\tv4,0\(sp\)
> -** vs2r.v\tv4,0\(a1\)
> +** vs2r.v\tv4,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -280,7 +280,7 @@ spill_12 (uint16_t *in, uint16_t *out)
> ** vs4r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl4re16.v\tv8,0\(sp\)
> -** vs4r.v\tv8,0\(a1\)
> +** vs4r.v\tv8,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -304,7 +304,7 @@ spill_13 (uint16_t *in, uint16_t *out)
> ** vs8r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl8re16.v\tv16,0\(sp\)
> -** vs8r.v\tv16,0\(a1\)
> +** vs8r.v\tv16,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c
> index 81d695a2a73..82d685e029d 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c
> @@ -11,16 +11,16 @@
> ** sub\tsp,sp,t0
> ** vsetvli\ta5,zero,e32,mf2,ta,ma
> ** vle32.v\tv[0-9]+,0\(a0\)
> -** csrr\ta3,vlenb
> -** srli\ta3,a3,1
> -** add\ta3,a3,sp
> -** vse32.v\tv[0-9]+,0\(a3\)
> -** ...
> -** csrr\ta3,vlenb
> -** srli\ta3,a3,1
> -** add\ta3,a3,sp
> -** vle32.v\tv[0-9]+,0\(a3\)
> -** vse32.v\tv[0-9]+,0\(a1\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ...
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vle32.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
> ** csrr\tt0,vlenb
> ** add\tsp,sp,t0
> ** ...
> @@ -42,7 +42,7 @@ spill_3 (int32_t *in, int32_t *out)
> ** vs1r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl1re32.v\tv2,0\(sp\)
> -** vs1r.v\tv2,0\(a1\)
> +** vs1r.v\tv2,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -66,7 +66,7 @@ spill_4 (int32_t *in, int32_t *out)
> ** vs2r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl2re32.v\tv4,0\(sp\)
> -** vs2r.v\tv4,0\(a1\)
> +** vs2r.v\tv4,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -90,7 +90,7 @@ spill_5 (int32_t *in, int32_t *out)
> ** vs4r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl4re32.v\tv8,0\(sp\)
> -** vs4r.v\tv8,0\(a1\)
> +** vs4r.v\tv8,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -114,7 +114,7 @@ spill_6 (int32_t *in, int32_t *out)
> ** vs8r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl8re32.v\tv16,0\(sp\)
> -** vs8r.v\tv16,0\(a1\)
> +** vs8r.v\tv16,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -135,16 +135,16 @@ spill_7 (int32_t *in, int32_t *out)
> ** sub\tsp,sp,t0
> ** vsetvli\ta5,zero,e32,mf2,ta,ma
> ** vle32.v\tv[0-9]+,0\(a0\)
> -** csrr\ta3,vlenb
> -** srli\ta3,a3,1
> -** add\ta3,a3,sp
> -** vse32.v\tv[0-9]+,0\(a3\)
> -** ...
> -** csrr\ta3,vlenb
> -** srli\ta3,a3,1
> -** add\ta3,a3,sp
> -** vle32.v\tv[0-9]+,0\(a3\)
> -** vse32.v\tv[0-9]+,0\(a1\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ...
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vle32.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
> ** csrr\tt0,vlenb
> ** add\tsp,sp,t0
> ** ...
> @@ -166,7 +166,7 @@ spill_10 (uint32_t *in, uint32_t *out)
> ** vs1r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl1re32.v\tv2,0\(sp\)
> -** vs1r.v\tv2,0\(a1\)
> +** vs1r.v\tv2,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -190,7 +190,7 @@ spill_11 (uint32_t *in, uint32_t *out)
> ** vs2r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl2re32.v\tv4,0\(sp\)
> -** vs2r.v\tv4,0\(a1\)
> +** vs2r.v\tv4,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -214,7 +214,7 @@ spill_12 (uint32_t *in, uint32_t *out)
> ** vs4r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl4re32.v\tv8,0\(sp\)
> -** vs4r.v\tv8,0\(a1\)
> +** vs4r.v\tv8,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -238,7 +238,7 @@ spill_13 (uint32_t *in, uint32_t *out)
> ** vs8r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl8re32.v\tv16,0\(sp\)
> -** vs8r.v\tv16,0\(a1\)
> +** vs8r.v\tv16,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c
> index 8ec7a2d4b2b..5b3f75f3552 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c
> @@ -11,16 +11,16 @@
> ** sub\tsp,sp,t0
> ** vsetvli\ta5,zero,e32,mf2,ta,ma
> ** vle32.v\tv[0-9]+,0\(a0\)
> -** csrr\ta3,vlenb
> -** srli\ta3,a3,1
> -** add\ta3,a3,sp
> -** vse32.v\tv[0-9]+,0\(a3\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
> ** ...
> -** csrr\ta3,vlenb
> -** srli\ta3,a3,1
> -** add\ta3,a3,sp
> -** vle32.v\tv[0-9]+,0\(a3\)
> -** vse32.v\tv[0-9]+,0\(a1\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,sp
> +** vle32.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
> ** csrr\tt0,vlenb
> ** add\tsp,sp,t0
> ** ...
> @@ -42,7 +42,7 @@ spill_3 (float *in, float *out)
> ** vs1r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl1re32.v\tv2,0\(sp\)
> -** vs1r.v\tv2,0\(a1\)
> +** vs1r.v\tv2,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -66,7 +66,7 @@ spill_4 (float *in, float *out)
> ** vs2r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl2re32.v\tv4,0\(sp\)
> -** vs2r.v\tv4,0\(a1\)
> +** vs2r.v\tv4,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -90,7 +90,7 @@ spill_5 (float *in, float *out)
> ** vs4r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl4re32.v\tv8,0\(sp\)
> -** vs4r.v\tv8,0\(a1\)
> +** vs4r.v\tv8,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> @@ -114,7 +114,7 @@ spill_6 (float *in, float *out)
> ** vs8r.v\tv[0-9]+,0\(sp\)
> ** ...
> ** vl8re32.v\tv16,0\(sp\)
> -** vs8r.v\tv16,0\(a1\)
> +** vs8r.v\tv16,0\([a-x0-9]+\)
> ** ...
> ** jr\tra
> */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c
> index e852a75578e..2bc54557dee 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c
> @@ -7,89 +7,92 @@
>
> /*
> ** spill:
> -** csrr\tt0,vlenb
> -** slli\tt1,t0,4
> -** sub\tsp,sp,t1
> -** vsetvli\ta3,zero,e8,mf8,ta,ma
> -** vle8.v\tv[0-9]+,0\(a0\)
> -** csrr\ta5,vlenb
> -** srli\ta5,a5,3
> -** add\ta5,a5,sp
> -** vse8.v\tv[0-9]+,0\(a5\)
> -** addi\ta5,a0,1
> -** vsetvli\ta4,zero,e8,mf4,ta,ma
> -** vle8.v\tv[0-9]+,0\(a5\)
> -** csrr\ta5,vlenb
> -** srli\ta5,a5,2
> -** add\ta5,a5,sp
> -** vse8.v\tv[0-9]+,0\(a5\)
> -** addi\ta2,a0,2
> -** vsetvli\ta5,zero,e8,mf2,ta,ma
> -** vle8.v\tv[0-9]+,0\(a2\)
> -** csrr\ta2,vlenb
> -** srli\ta2,a2,1
> -** add\ta2,a2,sp
> -** vse8.v\tv[0-9]+,0\(a2\)
> -** addi\ta2,a0,3
> -** vl1re8.v\tv[0-9]+,0\(a2\)
> -** csrr\ta2,vlenb
> -** add\ta2,a2,sp
> -** vs1r.v\tv[0-9]+,0\(a2\)
> -** addi\ta2,a0,4
> -** vl2re8.v\tv[0-9]+,0\(a2\)
> -** csrr\tt3,vlenb
> -** slli\ta2,t3,1
> -** add\ta2,a2,sp
> -** vs2r.v\tv[0-9]+,0\(a2\)
> -** addi\ta2,a0,5
> -** vl4re8.v\tv[0-9]+,0\(a2\)
> -** mv\ta2,t3
> -** slli\tt3,t3,2
> -** add\tt3,t3,sp
> -** vs4r.v\tv[0-9]+,0\(t3\)
> -** addi\ta0,a0,6
> -** vl8re8.v\tv[0-9]+,0\(a0\)
> -** slli\ta0,a2,3
> -** add\ta0,a0,sp
> -** vs8r.v\tv[0-9]+,0\(a0\)
> +** csrr\t[a-x0-9]+,vlenb
> +** slli\t[a-x0-9]+,[a-x0-9]+,4
> +** sub\tsp,[a-x0-9]+,[a-x0-9]+
> +** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
> +** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,3
> +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** addi\t[a-x0-9]+,[a-x0-9]+,1
> +** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
> +** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,2
> +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** addi\t[a-x0-9]+,[a-x0-9]+,2
> +** vsetvli\t[a-x0-9]+,zero,e8,mf2,ta,ma
> +** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** csrr\t[a-x0-9]+,vlenb
> +** srli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** addi\t[a-x0-9]+,[a-x0-9]+,3
> +** vl1re8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** csrr\t[a-x0-9]+,vlenb
> +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** vs1r.v\tv[0-9]+,0\([a-x0-9]+\)
> +** addi\t[a-x0-9]+,[a-x0-9]+,4
> +** vl2re8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** csrr\t[a-x0-9]+,vlenb
> +** slli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
> +** addi\t[a-x0-9]+,[a-x0-9]+,5
> +** vl4re8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** mv\t[a-x0-9]+,[a-x0-9]+
> +** slli\t[a-x0-9]+,[a-x0-9]+,2
> +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** vs4r.v\tv[0-9]+,0\([a-x0-9]+\)
> +** addi\t[a-x0-9]+,[a-x0-9]+,6
> +** vl8re8.v\tv[0-9]+,0\([a-x0-9]+\)
> ** ...
> -** srli\ta0,a2,3
> -** add\ta0,a0,sp
> +** slli\t[a-x0-9]+,[a-x0-9]+,3
> +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** vs8r.v\tv[0-9]+,0\([a-x0-9]+\)
> ** ...
> -** vle8.v\tv[0-9]+,0\(a0\)
> -** vse8.v\tv[0-9]+,0\(a1\)
> -** addi\ta3,a1,1
> -** srli\ta0,a2,2
> -** add\ta0,a0,sp
> +** srli\t[a-x0-9]+,[a-x0-9]+,3
> +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> ** ...
> -** vle8.v\tv[0-9]+,0\(a0\)
> -** vse8.v\tv[0-9]+,0\(a3\)
> -** addi\ta4,a1,2
> -** srli\ta3,a2,1
> -** add\ta3,a3,sp
> +** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** addi\t[a-x0-9]+,[a-x0-9]+,1
> +** srli\t[a-x0-9]+,[a-x0-9]+,2
> +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> ** ...
> -** vle8.v\tv[0-9]+,0\(a3\)
> -** vse8.v\tv[0-9]+,0\(a4\)
> -** addi\ta5,a1,3
> -** add\ta4,a2,sp
> -** vl1re8.v\tv[0-9]+,0\(a4\)
> -** vs1r.v\tv[0-9]+,0\(a5\)
> -** addi\ta5,a1,4
> -** slli\ta4,a2,1
> -** add\ta4,a4,sp
> -** vl2re8.v\tv[0-9]+,0\(a4\)
> -** vs2r.v\tv[0-9]+,0\(a5\)
> -** addi\ta5,a1,5
> -** vl4re8.v\tv[0-9]+,0\(t3\)
> -** vs4r.v\tv[0-9]+,0\(a5\)
> -** addi\ta1,a1,6
> -** slli\ta5,a2,3
> -** add\ta5,a5,sp
> -** vl8re8.v\tv[0-9]+,0\(a5\)
> -** vs8r.v\tv[0-9]+,0\(a1\)
> -** csrr\tt0,vlenb
> -** slli\tt1,t0,4
> -** add\tsp,sp,t1
> +** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** addi\t[a-x0-9]+,[a-x0-9]+,2
> +** srli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** ...
> +** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** addi\t[a-x0-9]+,[a-x0-9]+,3
> +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** vl1re8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vs1r.v\tv[0-9]+,0\([a-x0-9]+\)
> +** addi\t[a-x0-9]+,[a-x0-9]+,4
> +** slli\t[a-x0-9]+,[a-x0-9]+,1
> +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** vl2re8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
> +** addi\t[a-x0-9]+,[a-x0-9]+,5
> +** slli\t[a-x0-9]+,[a-x0-9]+,2
> +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** vl4re8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vs4r.v\tv[0-9]+,0\([a-x0-9]+\)
> +** addi\t[a-x0-9]+,[a-x0-9]+,6
> +** slli\t[a-x0-9]+,[a-x0-9]+,3
> +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
> +** vl8re8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vs8r.v\tv[0-9]+,0\([a-x0-9]+\)
> +** csrr\t[a-x0-9]+,vlenb
> +** slli\t[a-x0-9]+,[a-x0-9]+,4
> +** add\tsp,[a-x0-9]+,[a-x0-9]+
> ** ...
> ** jr\tra
> */
> --
> 2.36.1
>
@@ -157,7 +157,9 @@ bool check_builtin_call (location_t, vec<location_t>, unsigned int,
tree, unsigned int, tree *);
bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
bool legitimize_move (rtx, rtx, machine_mode);
+void emit_vlmax_vsetvl (machine_mode, rtx);
void emit_vlmax_op (unsigned, rtx, rtx, machine_mode);
+void emit_vlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
void emit_nonvlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
enum vlmul_type get_vlmul (machine_mode);
unsigned int get_ratio (machine_mode);
@@ -98,6 +98,15 @@ private:
expand_operand m_ops[MAX_OPERANDS];
};
+static unsigned
+get_sew (machine_mode mode)
+{
+ unsigned int sew = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+ ? 8
+ : GET_MODE_BITSIZE (GET_MODE_INNER (mode));
+ return sew;
+}
+
/* Return true if X is a const_vector with all duplicate elements, which is in
the range between MINVAL and MAXVAL. */
bool
@@ -109,13 +118,10 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval,
&& IN_RANGE (INTVAL (elt), minval, maxval));
}
-static rtx
-emit_vlmax_vsetvl (machine_mode vmode)
+void
+emit_vlmax_vsetvl (machine_mode vmode, rtx vl)
{
- rtx vl = gen_reg_rtx (Pmode);
- unsigned int sew = GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL
- ? 8
- : GET_MODE_BITSIZE (GET_MODE_INNER (vmode));
+ unsigned int sew = get_sew (vmode);
enum vlmul_type vlmul = get_vlmul (vmode);
unsigned int ratio = calculate_ratio (sew, vlmul);
@@ -125,8 +131,6 @@ emit_vlmax_vsetvl (machine_mode vmode)
const0_rtx));
else
emit_insn (gen_vlmax_avl (Pmode, vl, gen_int_mode (ratio, Pmode)));
-
- return vl;
}
/* Calculate SEW/LMUL ratio. */
@@ -166,7 +170,7 @@ calculate_ratio (unsigned int sew, enum vlmul_type vlmul)
/* Emit an RVV unmask && vl mov from SRC to DEST. */
static void
emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
- machine_mode mask_mode)
+ machine_mode mask_mode, bool vlmax_p)
{
insn_expander<8> e;
machine_mode mode = GET_MODE (dest);
@@ -186,17 +190,18 @@ emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
e.add_input_operand (len, Pmode);
else
{
- rtx vlmax = emit_vlmax_vsetvl (mode);
+ rtx vlmax = gen_reg_rtx (Pmode);
+ emit_vlmax_vsetvl (mode, vlmax);
e.add_input_operand (vlmax, Pmode);
}
if (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
e.add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy ());
- if (len)
- e.add_avl_type_operand (avl_type::NONVLMAX);
- else
+ if (vlmax_p)
e.add_avl_type_operand (avl_type::VLMAX);
+ else
+ e.add_avl_type_operand (avl_type::NONVLMAX);
e.expand ((enum insn_code) icode, MEM_P (dest) || MEM_P (src));
}
@@ -204,14 +209,21 @@ emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
void
emit_vlmax_op (unsigned icode, rtx dest, rtx src, machine_mode mask_mode)
{
- emit_pred_op (icode, NULL_RTX, dest, src, NULL_RTX, mask_mode);
+ emit_pred_op (icode, NULL_RTX, dest, src, NULL_RTX, mask_mode, true);
+}
+
+void
+emit_vlmax_op (unsigned icode, rtx dest, rtx src, rtx len,
+ machine_mode mask_mode)
+{
+ emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode, true);
}
void
emit_nonvlmax_op (unsigned icode, rtx dest, rtx src, rtx len,
machine_mode mask_mode)
{
- emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode);
+ emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode, false);
}
static void
@@ -265,6 +277,20 @@ legitimize_move (rtx dest, rtx src, machine_mode mask_mode)
expand_const_vector (dest, src, mask_mode);
return true;
}
+
+ /* In order to decrease the memory traffic, we don't use whole register
+ * load/store for the LMUL less than 1 and mask mode, so those case will
+ * require one extra general purpose register, but it's not allowed during LRA
+ * process, so we have a special move pattern used for LRA, which will defer
+ * the expansion after LRA. */
+ if ((known_lt (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR)
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+ && lra_in_progress)
+ {
+ emit_insn (gen_mov_lra (mode, Pmode, dest, src));
+ return true;
+ }
+
if (known_ge (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR)
&& GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
{
@@ -274,6 +300,13 @@ legitimize_move (rtx dest, rtx src, machine_mode mask_mode)
return false;
}
+
+ if (register_operand (src, mode) && register_operand (dest, mode))
+ {
+ emit_insn (gen_rtx_SET (dest, src));
+ return true;
+ }
+
if (!register_operand (src, mode) && !register_operand (dest, mode))
{
rtx tmp = gen_reg_rtx (mode);
@@ -540,9 +573,7 @@ force_vector_length_operand (rtx vl)
static rtx
gen_no_side_effects_vsetvl_rtx (machine_mode vmode, rtx vl, rtx avl)
{
- unsigned int sew = GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL
- ? 8
- : GET_MODE_BITSIZE (GET_MODE_INNER (vmode));
+ unsigned int sew = get_sew (vmode);
return gen_vsetvl_no_side_effects (Pmode, vl, avl, gen_int_mode (sew, Pmode),
gen_int_mode (get_vlmul (vmode), Pmode),
const0_rtx, const0_rtx);
@@ -635,6 +635,62 @@
[(set_attr "type" "vmov")
(set_attr "mode" "<MODE>")])
+(define_expand "@mov<V_FRACT:mode><P:mode>_lra"
+ [(parallel
+ [(set (match_operand:V_FRACT 0 "reg_or_mem_operand")
+ (match_operand:V_FRACT 1 "reg_or_mem_operand"))
+ (clobber (match_scratch:P 2))])]
+ "TARGET_VECTOR && (lra_in_progress || reload_completed)"
+{})
+
+(define_expand "@mov<VB:mode><P:mode>_lra"
+ [(parallel
+ [(set (match_operand:VB 0 "reg_or_mem_operand")
+ (match_operand:VB 1 "reg_or_mem_operand"))
+ (clobber (match_scratch:P 2))])]
+ "TARGET_VECTOR && (lra_in_progress || reload_completed)"
+{})
+
+(define_insn_and_split "*mov<V_FRACT:mode><P:mode>_lra"
+ [(set (match_operand:V_FRACT 0 "reg_or_mem_operand" "=vr, m,vr")
+ (match_operand:V_FRACT 1 "reg_or_mem_operand" " m,vr,vr"))
+ (clobber (match_scratch:P 2 "=&r,&r,X"))]
+ "TARGET_VECTOR && (lra_in_progress || reload_completed)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ if (REG_P (operands[0]) && REG_P (operands[1]))
+ emit_insn (gen_rtx_SET (operands[0], operands[1]));
+ else
+ {
+ riscv_vector::emit_vlmax_vsetvl (<V_FRACT:MODE>mode, operands[2]);
+ riscv_vector::emit_vlmax_op (code_for_pred_mov (<V_FRACT:MODE>mode),
+ operands[0], operands[1], operands[2], <VM>mode);
+ }
+ DONE;
+})
+
+(define_insn_and_split "*mov<VB:mode><P:mode>_lra"
+ [(set (match_operand:VB 0 "reg_or_mem_operand" "=vr, m,vr")
+ (match_operand:VB 1 "reg_or_mem_operand" " m,vr,vr"))
+ (clobber (match_scratch:P 2 "=&r,&r,X"))]
+ "TARGET_VECTOR && (lra_in_progress || reload_completed)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ if (REG_P (operands[0]) && REG_P (operands[1]))
+ emit_insn (gen_rtx_SET (operands[0], operands[1]));
+ else
+ {
+ riscv_vector::emit_vlmax_vsetvl (<VB:MODE>mode, operands[2]);
+ riscv_vector::emit_vlmax_op (code_for_pred_mov (<VB:MODE>mode),
+ operands[0], operands[1], operands[2], <VB:MODE>mode);
+ }
+ DONE;
+})
+
;; -----------------------------------------------------------------
;; ---- Duplicate Operations
;; -----------------------------------------------------------------
new file mode 100644
@@ -0,0 +1,140 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+#include <iostream>
+#include "riscv_vector.h"
+using std::cerr;
+using std::endl;
+template < class , class b > int c(b val) {
+ return val;
+}
+auto &f32(c< float, uint32_t >);
+template < class d >
+bool check(d , d , size_t );
+int main() {
+ size_t e ;
+ int16_t f[] {};
+ size_t g ;
+ int32_t i[] {4784};
+ size_t aa = 4;
+ int16_t ab[] {2313};
+ int16_t j[] {7114 };
+ int16_t k[] {7696 };
+ uint32_t l[] {9951 };
+ int32_t m[] {2659 };
+ uint16_t n[] {7537 };
+ int32_t o[] {05733}
+ ;
+ uint32_t p[] {7010090 };
+ uint32_t q[] {21060 };
+ uint32_t r[] {2273 };
+ uint32_t s[] {4094366 };
+ int16_t ac[] {11880 };
+ int16_t t[] {10988};
+ int16_t ad[] {30376};
+ int8_t u[] {};
+ int8_t ae[] {7};
+ int8_t v[] {40};
+ int8_t af[] {6};
+ int16_t w[] {4077 };
+ int16_t x[] {7932 };
+ int8_t y[] {3};
+ int8_t z[] {4};
+ uint16_t ag[] {2831};
+ int16_t ah[] {10412 };
+ int16_t ai[] {6823};
+ int32_t aj[] {8572 };
+ int32_t ak[] {9999 };
+ uint32_t al[] {50166962 };
+ uint32_t am[] {9781 };
+ int8_t an[] {9, 35};
+ float ao[] {222.65, 22.79};
+ float ap[] {126.10, 13.92};
+ int64_t aq[] {508727, 5556};
+ int16_t ar[] {2861 };
+ int16_t as[] {21420};
+ int16_t at[] {4706 };
+ uint32_t au ;
+ uint32_t av = 600295662;
+ size_t aw ;
+ int16_t ax = 13015;
+ uint32_t ay ;
+ uint16_t az = 10652;
+ int32_t ba ;
+ int8_t bb ;
+ int64_t bc = 40183771683589512;
+
+asm volatile ("ttt":::"memory");
+ vint16mf4_t bd = __riscv_vle16_v_i16mf4(j, 2);
+ vuint32mf2_t be = __riscv_vle32_v_u32mf2(l, 2);
+ vint32mf2_t bf = __riscv_vle32_v_i32mf2(m, 2);
+ vuint16mf4_t bg = __riscv_vle16_v_u16mf4(n, 2);
+ vint8mf4_t bh ;
+ vuint32m2_t bi = __riscv_vle32_v_u32m2(p, 2);
+ vuint32m2_t bj = __riscv_vle32_v_u32m2(q, 2);
+ vuint32m2_t bk = __riscv_vle32_v_u32m2(r, 2);
+ vuint32m2_t bl = __riscv_vle32_v_u32m2(s, 2);
+ vint16m1_t bm = __riscv_vle16_v_i16m1(ac, 2);
+ vint16m1_t bn = __riscv_vle16_v_i16m1(t, 2);
+ vint8mf2_t bo = __riscv_vle8_v_i8mf2(u, 1);
+ vint8mf2_t bp = __riscv_vle8_v_i8mf2(ae, 1);
+ vint8mf8_t bq = __riscv_vle8_v_i8mf8(af, 1);
+ vint16mf4_t br = __riscv_vle16_v_i16mf4(w, 2);
+ vint16mf4_t bs = __riscv_vle16_v_i16mf4(x, 2);
+ vint8mf8_t bt = __riscv_vle8_v_i8mf8(y, 1);
+ vint8mf8_t bu = __riscv_vle8_v_i8mf8(z, 1);
+ vuint16mf4_t bv = __riscv_vle16_v_u16mf4(ag, 1);
+ vint16mf4_t bw = __riscv_vle16_v_i16mf4(ah, 2);
+ vint16mf4_t bx = __riscv_vle16_v_i16mf4(ai, 2);
+ vint32mf2_t by = __riscv_vle32_v_i32mf2(aj, 2);
+ vint32mf2_t bz = __riscv_vle32_v_i32mf2(ak, 2);
+ vuint32mf2_t ca = __riscv_vle32_v_u32mf2(al, 2);
+ vuint32mf2_t cb = __riscv_vle32_v_u32mf2(am, 2);
+ vint8mf8_t cc = __riscv_vle8_v_i8mf8(an, 2);
+ vfloat32mf2_t cd = __riscv_vle32_v_f32mf2(ao, 2);
+ vfloat32mf2_t ce = __riscv_vle32_v_f32mf2(ap, 2);
+ vint64m1_t cf = __riscv_vle64_v_i64m1(aq, 2);
+ vint16mf4_t cg = __riscv_vle16_v_i16mf4(ar, 2);
+ vint16mf4_t ch = __riscv_vle16_v_i16mf4(as, 2);
+ vint16mf4_t var_62 = __riscv_vle16_v_i16mf4(at, 2);
+ vbool64_t var_20 = __riscv_vmadc_vx_u32mf2_b64(be, ay, 2);
+ int8_t var_17 = __riscv_vmv_x_s_i8mf4_i8(bh);
+ vbool16_t var_28 = __riscv_vmsltu_vv_u32m2_b16(bk, bl, 2);
+ vint8mf2_t var_14 = __riscv_vadd_vv_i8mf2(bo, bp, 1);
+ vbool64_t var_8 = __riscv_vmseq_vv_i16mf4_b64(br, bs, 2);
+ vbool64_t var_42 = __riscv_vmsbc_vx_u16mf4_b64(bv, az, 1);
+ vbool64_t var_46 = __riscv_vmsge_vx_i32mf2_b64(by, ba, 2);
+ vint16mf4_t var_4 = __riscv_vncvt_x_x_w_i16mf4(bz, 2);
+ vbool64_t var_51 = __riscv_vmsgt_vx_i8mf8_b64(cc, bb, 2);
+ vbool64_t var_56 = __riscv_vmfne_vv_f32mf2_b64(cd, ce, 2);
+ vbool64_t var_55 = __riscv_vmseq_vx_i64m1_b64(cf, bc, 2);
+ vuint32m2_t var_16 = __riscv_vslideup_vx_u32m2_mu(var_28, bi, bj, aw, 2);
+ vint8mf2_t var_12 = __riscv_vmulh_vv_i8mf2(var_14, var_14, 1);
+ vint16mf4_t var_0 = __riscv_vdiv_vv_i16mf4_mu(var_8, var_4, ch, var_62, 2);
+ vuint32m2_t var_13 = __riscv_vsub_vx_u32m2(var_16, av, 2);
+ int8_t var_9 = __riscv_vmv_x_s_i8mf2_i8(var_12);
+ vint16mf4_t var_19 = __riscv_vor_vx_i16mf4_mu(var_20, var_0, bd, ax, 2);
+ uint32_t var_10 = __riscv_vmv_x_s_u32m2_u32(var_13);
+ vint8mf8_t var_7 = __riscv_vmadd_vx_i8mf8_mu(var_42, bt, var_9, bu, 1);
+ __riscv_vse16_v_i16mf4(k, var_19, 2);
+ vuint32mf2_t var_3 =
+ __riscv_vslide1down_vx_u32mf2_mu(var_51, ca, cb, var_10, 2);
+ if (check(k, ab, aa))
+ cerr << "check 8 fails" << endl;
+ vbool64_t var_2 = __riscv_vmsne_vx_u32mf2_b64_mu(var_55, var_56, var_3, au, 2);
+ vint16mf4_t var_1 = __riscv_vssub_vv_i16mf4_mu(var_2, var_0, var_4, cg, 2);
+ vint16mf4_t var_5 = __riscv_vxor_vv_i16mf4_mu(var_46, var_1, bw, bx, 2);
+ vint32mf2_t var_18 = __riscv_vwmaccsu_vv_i32mf2(bf, var_1, bg, 2);
+ vint8mf8_t var_6 = __riscv_vncvt_x_x_w_i8mf8_mu(var_8, var_7, var_5, 1);
+ vint16m1_t var_15 = __riscv_vredand_vs_i16mf4_i16m1_tu(bm, var_5, bn, 2);
+ __riscv_vse32_v_i32mf2(o, var_18, 2);
+ vbool64_t var_11 = __riscv_vmsge_vx_i8mf8_b64(var_6, var_17, 1);
+ __riscv_vse16_v_i16m1(ad, var_15, 1);
+ if (check(o, i, g))
+ cerr << "check 1 fails" << endl;
+ __riscv_vse8_v_i8mf8_m(var_11, v, bq, 1);
+ if (check(ad, f, e))
+ cerr << "check 4 fails" << endl;
+ cerr << "check 7 fails" << endl;
+ return 0;
+}
@@ -24,3 +24,4 @@ void f2 (void * in, void *out, int32_t x)
__riscv_vsm_v_b32 (out, m4, 4);
}
+/* { dg-final { scan-assembler-times {vmv} 2 } } */
@@ -24,4 +24,5 @@ void f2 (void * in, void *out, int32_t x)
__riscv_vsm_v_b32 (out, m4, 4);
}
+/* { dg-final { scan-assembler-times {vmv} 2 } } */
@@ -24,4 +24,4 @@ void f2 (void * in, void *out, int32_t x)
__riscv_vsm_v_b32 (out, m4, 4);
}
-
+/* { dg-final { scan-assembler-times {vmv} 2 } } */
new file mode 100644
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl);
+ vuint16m1_t v2 = __riscv_vle16_v_u16m1 (base2, vl);
+ vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl);
+ vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28");
+
+ vbool16_t v = __riscv_vmseq_vv_u16m1_b16_mu(m1,m2,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27");
+
+ __riscv_vsm_v_b16 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t shift)
+{
+ vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl);
+ vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl);
+ vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ vbool16_t v = __riscv_vmseq_vx_u16m1_b16_mu(m1,m2,v1,shift,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+
+ __riscv_vsm_v_b16 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl);
+ vuint16m1_t v2 = __riscv_vle16_v_u16m1 (base2, vl);
+ vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl);
+ vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28");
+
+ vbool16_t v = __riscv_vmsltu_vv_u16m1_b16_mu(m1,m2,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27");
+
+ __riscv_vsm_v_b16 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t shift)
+{
+ vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl);
+ vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl);
+ vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ vbool16_t v = __riscv_vmsltu_vx_u16m1_b16_mu(m1,m2,v1,shift,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+
+ __riscv_vsm_v_b16 (out,v,vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t shift)
+{
+ vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl);
+ vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl);
+ vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ vbool16_t v = __riscv_vmsltu_vx_u16m1_b16_mu(m1,m2,v1,shift,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+
+ __riscv_vsm_v_b16 (out,v,vl);
+}
+
+/* { dg-final { scan-assembler-times {vmv} 1 } } */
@@ -10,20 +10,20 @@
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** ...
-** csrr\ta2,vlenb
-** srli\ta2,a2,3
-** slli\ta3,a2,3
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vse8.v\tv[0-9]+,0\(a3\)
-** ...
-** csrr\ta2,vlenb
-** srli\ta2,a2,3
-** slli\ta3,a2,3
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vle8.v\tv[0-9]+,0\(a3\)
-** vse8.v\tv[0-9]+,0\(a1\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,3
+** slli\t[a-x0-9]+,[a-x0-9]+,3
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,3
+** slli\t[a-x0-9]+,[a-x0-9]+,3
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -43,20 +43,20 @@ spill_1 (int8_t *in, int8_t *out)
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e8,mf4,ta,ma
** vle8.v\tv[0-9]+,0\(a0\)
-** csrr\ta2,vlenb
-** srli\ta2,a2,2
-** slli\ta3,a2,2
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vse8.v\tv[0-9]+,0\(a3\)
-** ...
-** csrr\ta2,vlenb
-** srli\ta2,a2,2
-** slli\ta3,a2,2
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vle8.v\tv[0-9]+,0\(a3\)
-** vse8.v\tv[0-9]+,0\(a1\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -76,16 +76,16 @@ spill_2 (int8_t *in, int8_t *out)
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e8,mf2,ta,ma
** vle8.v\tv[0-9]+,0\(a0\)
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vse8.v\tv[0-9]+,0\(a3\)
-** ...
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vle8.v\tv[0-9]+,0\(a3\)
-** vse8.v\tv[0-9]+,0\(a1\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -107,7 +107,7 @@ spill_3 (int8_t *in, int8_t *out)
** vs1r.v\tv[0-9]+,0\(sp\)
** ...
** vl1re8.v\tv2,0\(sp\)
-** vs1r.v\tv2,0\(a1\)
+** vs1r.v\tv2,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -131,7 +131,7 @@ spill_4 (int8_t *in, int8_t *out)
** vs2r.v\tv[0-9]+,0\(sp\)
** ...
** vl2re8.v\tv4,0\(sp\)
-** vs2r.v\tv4,0\(a1\)
+** vs2r.v\tv4,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -155,7 +155,7 @@ spill_5 (int8_t *in, int8_t *out)
** vs4r.v\tv[0-9]+,0\(sp\)
** ...
** vl4re8.v\tv8,0\(sp\)
-** vs4r.v\tv8,0\(a1\)
+** vs4r.v\tv8,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -179,7 +179,7 @@ spill_6 (int8_t *in, int8_t *out)
** vs8r.v\tv[0-9]+,0\(sp\)
** ...
** vl8re8.v\tv16,0\(sp\)
-** vs8r.v\tv16,0\(a1\)
+** vs8r.v\tv16,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -200,20 +200,20 @@ spill_7 (int8_t *in, int8_t *out)
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e8,mf8,ta,ma
** vle8.v\tv[0-9]+,0\(a0\)
-** csrr\ta2,vlenb
-** srli\ta2,a2,3
-** slli\ta3,a2,3
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vse8.v\tv[0-9]+,0\(a3\)
-** ...
-** csrr\ta2,vlenb
-** srli\ta2,a2,3
-** slli\ta3,a2,3
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vle8.v\tv[0-9]+,0\(a3\)
-** vse8.v\tv[0-9]+,0\(a1\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,3
+** slli\t[a-x0-9]+,[a-x0-9]+,3
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,3
+** slli\t[a-x0-9]+,[a-x0-9]+,3
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -233,20 +233,20 @@ spill_8 (uint8_t *in, uint8_t *out)
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e8,mf4,ta,ma
** vle8.v\tv[0-9]+,0\(a0\)
-** csrr\ta2,vlenb
-** srli\ta2,a2,2
-** slli\ta3,a2,2
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vse8.v\tv[0-9]+,0\(a3\)
-** ...
-** csrr\ta2,vlenb
-** srli\ta2,a2,2
-** slli\ta3,a2,2
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vle8.v\tv[0-9]+,0\(a3\)
-** vse8.v\tv[0-9]+,0\(a1\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -266,16 +266,16 @@ spill_9 (uint8_t *in, uint8_t *out)
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e8,mf2,ta,ma
** vle8.v\tv[0-9]+,0\(a0\)
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vse8.v\tv[0-9]+,0\(a3\)
-** ...
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vle8.v\tv[0-9]+,0\(a3\)
-** vse8.v\tv[0-9]+,0\(a1\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -297,7 +297,7 @@ spill_10 (uint8_t *in, uint8_t *out)
** vs1r.v\tv[0-9]+,0\(sp\)
** ...
** vl1re8.v\tv2,0\(sp\)
-** vs1r.v\tv2,0\(a1\)
+** vs1r.v\tv2,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -321,7 +321,7 @@ spill_11 (uint8_t *in, uint8_t *out)
** vs2r.v\tv[0-9]+,0\(sp\)
** ...
** vl2re8.v\tv4,0\(sp\)
-** vs2r.v\tv4,0\(a1\)
+** vs2r.v\tv4,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -345,7 +345,7 @@ spill_12 (uint8_t *in, uint8_t *out)
** vs4r.v\tv[0-9]+,0\(sp\)
** ...
** vl4re8.v\tv8,0\(sp\)
-** vs4r.v\tv8,0\(a1\)
+** vs4r.v\tv8,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -369,7 +369,7 @@ spill_13 (uint8_t *in, uint8_t *out)
** vs8r.v\tv[0-9]+,0\(sp\)
** ...
** vl8re8.v\tv16,0\(sp\)
-** vs8r.v\tv16,0\(a1\)
+** vs8r.v\tv16,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -11,20 +11,20 @@
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e16,mf4,ta,ma
** vle16.v\tv[0-9]+,0\(a0\)
-** csrr\ta2,vlenb
-** srli\ta2,a2,2
-** slli\ta3,a2,2
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vse16.v\tv[0-9]+,0\(a3\)
-** ...
-** csrr\ta2,vlenb
-** srli\ta2,a2,2
-** slli\ta3,a2,2
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vle16.v\tv[0-9]+,0\(a3\)
-** vse16.v\tv[0-9]+,0\(a1\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle16.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -44,16 +44,16 @@ spill_2 (int16_t *in, int16_t *out)
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e16,mf2,ta,ma
** vle16.v\tv[0-9]+,0\(a0\)
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vse16.v\tv[0-9]+,0\(a3\)
-** ...
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vle16.v\tv[0-9]+,0\(a3\)
-** vse16.v\tv[0-9]+,0\(a1\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle16.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -75,7 +75,7 @@ spill_3 (int16_t *in, int16_t *out)
** vs1r.v\tv[0-9]+,0\(sp\)
** ...
** vl1re16.v\tv2,0\(sp\)
-** vs1r.v\tv2,0\(a1\)
+** vs1r.v\tv2,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -99,7 +99,7 @@ spill_4 (int16_t *in, int16_t *out)
** vs2r.v\tv[0-9]+,0\(sp\)
** ...
** vl2re16.v\tv4,0\(sp\)
-** vs2r.v\tv4,0\(a1\)
+** vs2r.v\tv4,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -123,7 +123,7 @@ spill_5 (int16_t *in, int16_t *out)
** vs4r.v\tv[0-9]+,0\(sp\)
** ...
** vl4re16.v\tv8,0\(sp\)
-** vs4r.v\tv8,0\(a1\)
+** vs4r.v\tv8,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -147,7 +147,7 @@ spill_6 (int16_t *in, int16_t *out)
** vs8r.v\tv[0-9]+,0\(sp\)
** ...
** vl8re16.v\tv16,0\(sp\)
-** vs8r.v\tv16,0\(a1\)
+** vs8r.v\tv16,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -168,20 +168,20 @@ spill_7 (int16_t *in, int16_t *out)
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e16,mf4,ta,ma
** vle16.v\tv[0-9]+,0\(a0\)
-** csrr\ta2,vlenb
-** srli\ta2,a2,2
-** slli\ta3,a2,2
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vse16.v\tv[0-9]+,0\(a3\)
-** ...
-** csrr\ta2,vlenb
-** srli\ta2,a2,2
-** slli\ta3,a2,2
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vle16.v\tv[0-9]+,0\(a3\)
-** vse16.v\tv[0-9]+,0\(a1\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle16.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -201,16 +201,16 @@ spill_9 (uint16_t *in, uint16_t *out)
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e16,mf2,ta,ma
** vle16.v\tv[0-9]+,0\(a0\)
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vse16.v\tv[0-9]+,0\(a3\)
-** ...
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vle16.v\tv[0-9]+,0\(a3\)
-** vse16.v\tv[0-9]+,0\(a1\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle16.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -232,7 +232,7 @@ spill_10 (uint16_t *in, uint16_t *out)
** vs1r.v\tv[0-9]+,0\(sp\)
** ...
** vl1re16.v\tv2,0\(sp\)
-** vs1r.v\tv2,0\(a1\)
+** vs1r.v\tv2,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -256,7 +256,7 @@ spill_11 (uint16_t *in, uint16_t *out)
** vs2r.v\tv[0-9]+,0\(sp\)
** ...
** vl2re16.v\tv4,0\(sp\)
-** vs2r.v\tv4,0\(a1\)
+** vs2r.v\tv4,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -280,7 +280,7 @@ spill_12 (uint16_t *in, uint16_t *out)
** vs4r.v\tv[0-9]+,0\(sp\)
** ...
** vl4re16.v\tv8,0\(sp\)
-** vs4r.v\tv8,0\(a1\)
+** vs4r.v\tv8,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -304,7 +304,7 @@ spill_13 (uint16_t *in, uint16_t *out)
** vs8r.v\tv[0-9]+,0\(sp\)
** ...
** vl8re16.v\tv16,0\(sp\)
-** vs8r.v\tv16,0\(a1\)
+** vs8r.v\tv16,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -11,16 +11,16 @@
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e32,mf2,ta,ma
** vle32.v\tv[0-9]+,0\(a0\)
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vse32.v\tv[0-9]+,0\(a3\)
-** ...
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vle32.v\tv[0-9]+,0\(a3\)
-** vse32.v\tv[0-9]+,0\(a1\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -42,7 +42,7 @@ spill_3 (int32_t *in, int32_t *out)
** vs1r.v\tv[0-9]+,0\(sp\)
** ...
** vl1re32.v\tv2,0\(sp\)
-** vs1r.v\tv2,0\(a1\)
+** vs1r.v\tv2,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -66,7 +66,7 @@ spill_4 (int32_t *in, int32_t *out)
** vs2r.v\tv[0-9]+,0\(sp\)
** ...
** vl2re32.v\tv4,0\(sp\)
-** vs2r.v\tv4,0\(a1\)
+** vs2r.v\tv4,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -90,7 +90,7 @@ spill_5 (int32_t *in, int32_t *out)
** vs4r.v\tv[0-9]+,0\(sp\)
** ...
** vl4re32.v\tv8,0\(sp\)
-** vs4r.v\tv8,0\(a1\)
+** vs4r.v\tv8,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -114,7 +114,7 @@ spill_6 (int32_t *in, int32_t *out)
** vs8r.v\tv[0-9]+,0\(sp\)
** ...
** vl8re32.v\tv16,0\(sp\)
-** vs8r.v\tv16,0\(a1\)
+** vs8r.v\tv16,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -135,16 +135,16 @@ spill_7 (int32_t *in, int32_t *out)
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e32,mf2,ta,ma
** vle32.v\tv[0-9]+,0\(a0\)
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vse32.v\tv[0-9]+,0\(a3\)
-** ...
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vle32.v\tv[0-9]+,0\(a3\)
-** vse32.v\tv[0-9]+,0\(a1\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -166,7 +166,7 @@ spill_10 (uint32_t *in, uint32_t *out)
** vs1r.v\tv[0-9]+,0\(sp\)
** ...
** vl1re32.v\tv2,0\(sp\)
-** vs1r.v\tv2,0\(a1\)
+** vs1r.v\tv2,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -190,7 +190,7 @@ spill_11 (uint32_t *in, uint32_t *out)
** vs2r.v\tv[0-9]+,0\(sp\)
** ...
** vl2re32.v\tv4,0\(sp\)
-** vs2r.v\tv4,0\(a1\)
+** vs2r.v\tv4,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -214,7 +214,7 @@ spill_12 (uint32_t *in, uint32_t *out)
** vs4r.v\tv[0-9]+,0\(sp\)
** ...
** vl4re32.v\tv8,0\(sp\)
-** vs4r.v\tv8,0\(a1\)
+** vs4r.v\tv8,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -238,7 +238,7 @@ spill_13 (uint32_t *in, uint32_t *out)
** vs8r.v\tv[0-9]+,0\(sp\)
** ...
** vl8re32.v\tv16,0\(sp\)
-** vs8r.v\tv16,0\(a1\)
+** vs8r.v\tv16,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -11,16 +11,16 @@
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e32,mf2,ta,ma
** vle32.v\tv[0-9]+,0\(a0\)
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vse32.v\tv[0-9]+,0\(a3\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
** ...
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vle32.v\tv[0-9]+,0\(a3\)
-** vse32.v\tv[0-9]+,0\(a1\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -42,7 +42,7 @@ spill_3 (float *in, float *out)
** vs1r.v\tv[0-9]+,0\(sp\)
** ...
** vl1re32.v\tv2,0\(sp\)
-** vs1r.v\tv2,0\(a1\)
+** vs1r.v\tv2,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -66,7 +66,7 @@ spill_4 (float *in, float *out)
** vs2r.v\tv[0-9]+,0\(sp\)
** ...
** vl2re32.v\tv4,0\(sp\)
-** vs2r.v\tv4,0\(a1\)
+** vs2r.v\tv4,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -90,7 +90,7 @@ spill_5 (float *in, float *out)
** vs4r.v\tv[0-9]+,0\(sp\)
** ...
** vl4re32.v\tv8,0\(sp\)
-** vs4r.v\tv8,0\(a1\)
+** vs4r.v\tv8,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -114,7 +114,7 @@ spill_6 (float *in, float *out)
** vs8r.v\tv[0-9]+,0\(sp\)
** ...
** vl8re32.v\tv16,0\(sp\)
-** vs8r.v\tv16,0\(a1\)
+** vs8r.v\tv16,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -7,89 +7,92 @@
/*
** spill:
-** csrr\tt0,vlenb
-** slli\tt1,t0,4
-** sub\tsp,sp,t1
-** vsetvli\ta3,zero,e8,mf8,ta,ma
-** vle8.v\tv[0-9]+,0\(a0\)
-** csrr\ta5,vlenb
-** srli\ta5,a5,3
-** add\ta5,a5,sp
-** vse8.v\tv[0-9]+,0\(a5\)
-** addi\ta5,a0,1
-** vsetvli\ta4,zero,e8,mf4,ta,ma
-** vle8.v\tv[0-9]+,0\(a5\)
-** csrr\ta5,vlenb
-** srli\ta5,a5,2
-** add\ta5,a5,sp
-** vse8.v\tv[0-9]+,0\(a5\)
-** addi\ta2,a0,2
-** vsetvli\ta5,zero,e8,mf2,ta,ma
-** vle8.v\tv[0-9]+,0\(a2\)
-** csrr\ta2,vlenb
-** srli\ta2,a2,1
-** add\ta2,a2,sp
-** vse8.v\tv[0-9]+,0\(a2\)
-** addi\ta2,a0,3
-** vl1re8.v\tv[0-9]+,0\(a2\)
-** csrr\ta2,vlenb
-** add\ta2,a2,sp
-** vs1r.v\tv[0-9]+,0\(a2\)
-** addi\ta2,a0,4
-** vl2re8.v\tv[0-9]+,0\(a2\)
-** csrr\tt3,vlenb
-** slli\ta2,t3,1
-** add\ta2,a2,sp
-** vs2r.v\tv[0-9]+,0\(a2\)
-** addi\ta2,a0,5
-** vl4re8.v\tv[0-9]+,0\(a2\)
-** mv\ta2,t3
-** slli\tt3,t3,2
-** add\tt3,t3,sp
-** vs4r.v\tv[0-9]+,0\(t3\)
-** addi\ta0,a0,6
-** vl8re8.v\tv[0-9]+,0\(a0\)
-** slli\ta0,a2,3
-** add\ta0,a0,sp
-** vs8r.v\tv[0-9]+,0\(a0\)
+** csrr\t[a-x0-9]+,vlenb
+** slli\t[a-x0-9]+,[a-x0-9]+,4
+** sub\tsp,[a-x0-9]+,[a-x0-9]+
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,3
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,1
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,2
+** vsetvli\t[a-x0-9]+,zero,e8,mf2,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,3
+** vl1re8.v\tv[0-9]+,0\([a-x0-9]+\)
+** csrr\t[a-x0-9]+,vlenb
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vs1r.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,4
+** vl2re8.v\tv[0-9]+,0\([a-x0-9]+\)
+** csrr\t[a-x0-9]+,vlenb
+** slli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,5
+** vl4re8.v\tv[0-9]+,0\([a-x0-9]+\)
+** mv\t[a-x0-9]+,[a-x0-9]+
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vs4r.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,6
+** vl8re8.v\tv[0-9]+,0\([a-x0-9]+\)
** ...
-** srli\ta0,a2,3
-** add\ta0,a0,sp
+** slli\t[a-x0-9]+,[a-x0-9]+,3
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vs8r.v\tv[0-9]+,0\([a-x0-9]+\)
** ...
-** vle8.v\tv[0-9]+,0\(a0\)
-** vse8.v\tv[0-9]+,0\(a1\)
-** addi\ta3,a1,1
-** srli\ta0,a2,2
-** add\ta0,a0,sp
+** srli\t[a-x0-9]+,[a-x0-9]+,3
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
** ...
-** vle8.v\tv[0-9]+,0\(a0\)
-** vse8.v\tv[0-9]+,0\(a3\)
-** addi\ta4,a1,2
-** srli\ta3,a2,1
-** add\ta3,a3,sp
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,1
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
** ...
-** vle8.v\tv[0-9]+,0\(a3\)
-** vse8.v\tv[0-9]+,0\(a4\)
-** addi\ta5,a1,3
-** add\ta4,a2,sp
-** vl1re8.v\tv[0-9]+,0\(a4\)
-** vs1r.v\tv[0-9]+,0\(a5\)
-** addi\ta5,a1,4
-** slli\ta4,a2,1
-** add\ta4,a4,sp
-** vl2re8.v\tv[0-9]+,0\(a4\)
-** vs2r.v\tv[0-9]+,0\(a5\)
-** addi\ta5,a1,5
-** vl4re8.v\tv[0-9]+,0\(t3\)
-** vs4r.v\tv[0-9]+,0\(a5\)
-** addi\ta1,a1,6
-** slli\ta5,a2,3
-** add\ta5,a5,sp
-** vl8re8.v\tv[0-9]+,0\(a5\)
-** vs8r.v\tv[0-9]+,0\(a1\)
-** csrr\tt0,vlenb
-** slli\tt1,t0,4
-** add\tsp,sp,t1
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,2
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,3
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vl1re8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vs1r.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,4
+** slli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vl2re8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,5
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vl4re8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vs4r.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,6
+** slli\t[a-x0-9]+,[a-x0-9]+,3
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vl8re8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vs8r.v\tv[0-9]+,0\([a-x0-9]+\)
+** csrr\t[a-x0-9]+,vlenb
+** slli\t[a-x0-9]+,[a-x0-9]+,4
+** add\tsp,[a-x0-9]+,[a-x0-9]+
** ...
** jr\tra
*/