RISC-V: Add RVV registers register spilling
Checks
Commit Message
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
This patch support RVV scalable register spilling.
prologue && epilogue handling pick up prototype from Monk Chiang <monk.chiang@sifive.com>.
Co-authored-by: Monk Chiang <monk.chiang@sifive.com>
gcc/ChangeLog:
* config/riscv/riscv-v.cc (emit_pred_move): Adjust for scalable register spilling.
(legitimize_move): Ditto.
* config/riscv/riscv.cc (riscv_v_adjust_scalable_frame): New function.
(riscv_first_stack_step): Adjust for scalable register spilling.
(riscv_expand_prologue): Ditto.
(riscv_expand_epilogue): Ditto.
(riscv_dwarf_poly_indeterminate_value): New function.
(TARGET_DWARF_POLY_INDETERMINATE_VALUE): New target hook support for register spilling.
* config/riscv/riscv.h (RISCV_DWARF_VLENB): New macro.
(RISCV_PROLOGUE_TEMP2_REGNUM): Ditto.
(RISCV_PROLOGUE_TEMP2): Ditto.
* config/riscv/vector-iterators.md: New iterators.
* config/riscv/vector.md (*mov<mode>): Fix it for register spilling.
(*mov<mode>_whole): New pattern.
(*mov<mode>_fract): New pattern.
(@pred_mov<mode>): Fix it for register spilling.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/mov-9.c:
* gcc.target/riscv/rvv/base/macro.h: New test.
* gcc.target/riscv/rvv/base/spill-1.c: New test.
* gcc.target/riscv/rvv/base/spill-10.c: New test.
* gcc.target/riscv/rvv/base/spill-11.c: New test.
* gcc.target/riscv/rvv/base/spill-12.c: New test.
* gcc.target/riscv/rvv/base/spill-2.c: New test.
* gcc.target/riscv/rvv/base/spill-3.c: New test.
* gcc.target/riscv/rvv/base/spill-4.c: New test.
* gcc.target/riscv/rvv/base/spill-5.c: New test.
* gcc.target/riscv/rvv/base/spill-6.c: New test.
* gcc.target/riscv/rvv/base/spill-7.c: New test.
* gcc.target/riscv/rvv/base/spill-8.c: New test.
* gcc.target/riscv/rvv/base/spill-9.c: New test.
---
gcc/config/riscv/riscv-v.cc | 47 +--
gcc/config/riscv/riscv.cc | 147 ++++++-
gcc/config/riscv/riscv.h | 3 +
gcc/config/riscv/vector-iterators.md | 23 ++
gcc/config/riscv/vector.md | 136 +++++--
.../gcc.target/riscv/rvv/base/macro.h | 6 +
.../gcc.target/riscv/rvv/base/mov-9.c | 8 +-
.../gcc.target/riscv/rvv/base/spill-1.c | 385 ++++++++++++++++++
.../gcc.target/riscv/rvv/base/spill-10.c | 41 ++
.../gcc.target/riscv/rvv/base/spill-11.c | 60 +++
.../gcc.target/riscv/rvv/base/spill-12.c | 47 +++
.../gcc.target/riscv/rvv/base/spill-2.c | 320 +++++++++++++++
.../gcc.target/riscv/rvv/base/spill-3.c | 254 ++++++++++++
.../gcc.target/riscv/rvv/base/spill-4.c | 196 +++++++++
.../gcc.target/riscv/rvv/base/spill-5.c | 130 ++++++
.../gcc.target/riscv/rvv/base/spill-6.c | 101 +++++
.../gcc.target/riscv/rvv/base/spill-7.c | 114 ++++++
.../gcc.target/riscv/rvv/base/spill-8.c | 51 +++
.../gcc.target/riscv/rvv/base/spill-9.c | 42 ++
19 files changed, 2021 insertions(+), 90 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/macro.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-11.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-12.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-8.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-9.c
Comments
Committed, thanks !
On Sun, Nov 6, 2022 at 1:57 AM <juzhe.zhong@rivai.ai> wrote:
>
> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>
> This patch support RVV scalable register spilling.
> prologue && epilogue handling pick up prototype from Monk Chiang <monk.chiang@sifive.com>.
> Co-authored-by: Monk Chiang <monk.chiang@sifive.com>
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-v.cc (emit_pred_move): Adjust for scalable register spilling.
> (legitimize_move): Ditto.
> * config/riscv/riscv.cc (riscv_v_adjust_scalable_frame): New function.
> (riscv_first_stack_step): Adjust for scalable register spilling.
> (riscv_expand_prologue): Ditto.
> (riscv_expand_epilogue): Ditto.
> (riscv_dwarf_poly_indeterminate_value): New function.
> (TARGET_DWARF_POLY_INDETERMINATE_VALUE): New target hook support for register spilling.
> * config/riscv/riscv.h (RISCV_DWARF_VLENB): New macro.
> (RISCV_PROLOGUE_TEMP2_REGNUM): Ditto.
> (RISCV_PROLOGUE_TEMP2): Ditto.
> * config/riscv/vector-iterators.md: New iterators.
> * config/riscv/vector.md (*mov<mode>): Fix it for register spilling.
> (*mov<mode>_whole): New pattern.
> (*mov<mode>_fract): New pattern.
> (@pred_mov<mode>): Fix it for register spilling.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/mov-9.c:
> * gcc.target/riscv/rvv/base/macro.h: New test.
> * gcc.target/riscv/rvv/base/spill-1.c: New test.
> * gcc.target/riscv/rvv/base/spill-10.c: New test.
> * gcc.target/riscv/rvv/base/spill-11.c: New test.
> * gcc.target/riscv/rvv/base/spill-12.c: New test.
> * gcc.target/riscv/rvv/base/spill-2.c: New test.
> * gcc.target/riscv/rvv/base/spill-3.c: New test.
> * gcc.target/riscv/rvv/base/spill-4.c: New test.
> * gcc.target/riscv/rvv/base/spill-5.c: New test.
> * gcc.target/riscv/rvv/base/spill-6.c: New test.
> * gcc.target/riscv/rvv/base/spill-7.c: New test.
> * gcc.target/riscv/rvv/base/spill-8.c: New test.
> * gcc.target/riscv/rvv/base/spill-9.c: New test.
>
> ---
> gcc/config/riscv/riscv-v.cc | 47 +--
> gcc/config/riscv/riscv.cc | 147 ++++++-
> gcc/config/riscv/riscv.h | 3 +
> gcc/config/riscv/vector-iterators.md | 23 ++
> gcc/config/riscv/vector.md | 136 +++++--
> .../gcc.target/riscv/rvv/base/macro.h | 6 +
> .../gcc.target/riscv/rvv/base/mov-9.c | 8 +-
> .../gcc.target/riscv/rvv/base/spill-1.c | 385 ++++++++++++++++++
> .../gcc.target/riscv/rvv/base/spill-10.c | 41 ++
> .../gcc.target/riscv/rvv/base/spill-11.c | 60 +++
> .../gcc.target/riscv/rvv/base/spill-12.c | 47 +++
> .../gcc.target/riscv/rvv/base/spill-2.c | 320 +++++++++++++++
> .../gcc.target/riscv/rvv/base/spill-3.c | 254 ++++++++++++
> .../gcc.target/riscv/rvv/base/spill-4.c | 196 +++++++++
> .../gcc.target/riscv/rvv/base/spill-5.c | 130 ++++++
> .../gcc.target/riscv/rvv/base/spill-6.c | 101 +++++
> .../gcc.target/riscv/rvv/base/spill-7.c | 114 ++++++
> .../gcc.target/riscv/rvv/base/spill-8.c | 51 +++
> .../gcc.target/riscv/rvv/base/spill-9.c | 42 ++
> 19 files changed, 2021 insertions(+), 90 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/macro.h
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-11.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-12.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-8.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-9.c
>
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 6615a5c7ffe..e0459e3f610 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -106,28 +106,25 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval,
>
> /* Emit an RVV unmask && vl mov from SRC to DEST. */
> static void
> -emit_pred_move (rtx dest, rtx src, rtx vl, machine_mode mask_mode)
> +emit_pred_move (rtx dest, rtx src, machine_mode mask_mode)
> {
> insn_expander<7> e;
> -
> machine_mode mode = GET_MODE (dest);
> - if (register_operand (src, mode) && register_operand (dest, mode))
> - {
> - emit_move_insn (dest, src);
> - return;
> - }
> + rtx vl = gen_reg_rtx (Pmode);
> + unsigned int sew = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
> + ? 8
> + : GET_MODE_BITSIZE (GET_MODE_INNER (mode));
> +
> + emit_insn (gen_vsetvl_no_side_effects (
> + Pmode, vl, gen_rtx_REG (Pmode, 0), gen_int_mode (sew, Pmode),
> + gen_int_mode ((unsigned int) mode, Pmode), const1_rtx, const1_rtx));
>
> e.add_output_operand (dest, mode);
> e.add_all_one_mask_operand (mask_mode);
> - /* For load operation, we create undef operand.
> - For store operation, we make it depend on the dest memory to
> - avoid potential bugs. */
> - if (MEM_P (src))
> - e.add_vundef_operand (mode);
> - else
> - e.add_input_operand (dest, mode);
> + e.add_vundef_operand (mode);
>
> e.add_input_operand (src, mode);
> +
> e.add_input_operand (vl, Pmode);
>
> e.add_policy_operand (TAIL_AGNOSTIC, MASK_AGNOSTIC);
> @@ -143,37 +140,25 @@ bool
> legitimize_move (rtx dest, rtx src, machine_mode mask_mode)
> {
> machine_mode mode = GET_MODE (dest);
> - /* For whole registers load/store or register-register move,
> - we don't need to specially handle them, just let them go
> - through "*mov<mode>" and then use the codegen directly. */
> - if ((known_ge (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR)
> - && (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL))
> - || (register_operand (src, mode) && register_operand (dest, mode)))
> + if (known_ge (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR)
> + && GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
> {
> /* Need to force register if mem <- !reg. */
> if (MEM_P (dest) && !REG_P (src))
> src = force_reg (mode, src);
> +
> return false;
> }
> -
> - rtx vlmax = gen_reg_rtx (Pmode);
> - unsigned int sew = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
> - ? 8
> - : GET_MODE_BITSIZE (GET_MODE_INNER (mode));
> - emit_insn (gen_vsetvl_no_side_effects (
> - Pmode, vlmax, gen_rtx_REG (Pmode, 0), gen_int_mode (sew, Pmode),
> - gen_int_mode ((unsigned int) mode, Pmode), const1_rtx, const1_rtx));
> -
> if (!register_operand (src, mode) && !register_operand (dest, mode))
> {
> rtx tmp = gen_reg_rtx (mode);
> if (MEM_P (src))
> - emit_pred_move (tmp, src, vlmax, mask_mode);
> + emit_pred_move (tmp, src, mask_mode);
> else
> emit_move_insn (tmp, src);
> src = tmp;
> }
> - emit_pred_move (dest, src, vlmax, mask_mode);
> + emit_pred_move (dest, src, mask_mode);
> return true;
> }
>
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 50ef38438a2..8615df473c1 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -1943,6 +1943,37 @@ riscv_legitimize_poly_move (machine_mode mode, rtx dest, rtx tmp, rtx src)
> }
> }
>
> +/* Adjust scalable frame of vector for prologue && epilogue. */
> +
> +static void
> +riscv_v_adjust_scalable_frame (rtx target, poly_int64 offset, bool epilogue)
> +{
> + rtx tmp = RISCV_PROLOGUE_TEMP (Pmode);
> + rtx adjust_size = RISCV_PROLOGUE_TEMP2 (Pmode);
> + rtx insn, dwarf, adjust_frame_rtx;
> +
> + riscv_legitimize_poly_move (Pmode, adjust_size, tmp,
> + gen_int_mode (offset, Pmode));
> +
> + if (epilogue)
> + insn = gen_add3_insn (target, target, adjust_size);
> + else
> + insn = gen_sub3_insn (target, target, adjust_size);
> +
> + insn = emit_insn (insn);
> +
> + RTX_FRAME_RELATED_P (insn) = 1;
> +
> + adjust_frame_rtx
> + = gen_rtx_SET (target,
> + plus_constant (Pmode, target, epilogue ? offset : -offset));
> +
> + dwarf = alloc_reg_note (REG_FRAME_RELATED_EXPR, copy_rtx (adjust_frame_rtx),
> + NULL_RTX);
> +
> + REG_NOTES (insn) = dwarf;
> +}
> +
> /* If (set DEST SRC) is not a valid move instruction, emit an equivalent
> sequence that is valid. */
>
> @@ -4824,21 +4855,29 @@ riscv_restore_reg (rtx reg, rtx mem)
> static HOST_WIDE_INT
> riscv_first_stack_step (struct riscv_frame_info *frame)
> {
> - if (SMALL_OPERAND (frame->total_size.to_constant()))
> - return frame->total_size.to_constant();
> + HOST_WIDE_INT frame_total_constant_size;
> + if (!frame->total_size.is_constant ())
> + frame_total_constant_size
> + = riscv_stack_align (frame->total_size.coeffs[0])
> + - riscv_stack_align (frame->total_size.coeffs[1]);
> + else
> + frame_total_constant_size = frame->total_size.to_constant ();
> +
> + if (SMALL_OPERAND (frame_total_constant_size))
> + return frame_total_constant_size;
>
> HOST_WIDE_INT min_first_step =
> RISCV_STACK_ALIGN ((frame->total_size - frame->fp_sp_offset).to_constant());
> HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
> - HOST_WIDE_INT min_second_step = frame->total_size.to_constant() - max_first_step;
> + HOST_WIDE_INT min_second_step = frame_total_constant_size - max_first_step;
> gcc_assert (min_first_step <= max_first_step);
>
> /* As an optimization, use the least-significant bits of the total frame
> size, so that the second adjustment step is just LUI + ADD. */
> if (!SMALL_OPERAND (min_second_step)
> - && frame->total_size.to_constant() % IMM_REACH < IMM_REACH / 2
> - && frame->total_size.to_constant() % IMM_REACH >= min_first_step)
> - return frame->total_size.to_constant() % IMM_REACH;
> + && frame_total_constant_size % IMM_REACH < IMM_REACH / 2
> + && frame_total_constant_size % IMM_REACH >= min_first_step)
> + return frame_total_constant_size % IMM_REACH;
>
> if (TARGET_RVC)
> {
> @@ -4911,12 +4950,12 @@ void
> riscv_expand_prologue (void)
> {
> struct riscv_frame_info *frame = &cfun->machine->frame;
> - HOST_WIDE_INT size = frame->total_size.to_constant ();
> + poly_int64 size = frame->total_size;
> unsigned mask = frame->mask;
> rtx insn;
>
> if (flag_stack_usage_info)
> - current_function_static_stack_size = size;
> + current_function_static_stack_size = constant_lower_bound (size);
>
> if (cfun->machine->naked_p)
> return;
> @@ -4938,7 +4977,9 @@ riscv_expand_prologue (void)
> /* Save the registers. */
> if ((frame->mask | frame->fmask) != 0)
> {
> - HOST_WIDE_INT step1 = MIN (size, riscv_first_stack_step (frame));
> + HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
> + if (size.is_constant ())
> + step1 = MIN (size.to_constant(), step1);
>
> insn = gen_add3_insn (stack_pointer_rtx,
> stack_pointer_rtx,
> @@ -4961,23 +5002,40 @@ riscv_expand_prologue (void)
> }
>
> /* Allocate the rest of the frame. */
> - if (size > 0)
> + if (known_gt (size, 0))
> {
> - if (SMALL_OPERAND (-size))
> + /* Two step adjustment:
> + 1.scalable frame. 2.constant frame. */
> + poly_int64 scalable_frame (0, 0);
> + if (!size.is_constant ())
> + {
> + /* First for scalable frame. */
> + poly_int64 scalable_frame = size;
> + scalable_frame.coeffs[0] = size.coeffs[1];
> + riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, false);
> + size -= scalable_frame;
> + }
> +
> + /* Second step for constant frame. */
> + HOST_WIDE_INT constant_frame = size.to_constant ();
> + if (constant_frame == 0)
> + return;
> +
> + if (SMALL_OPERAND (-constant_frame))
> {
> insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
> - GEN_INT (-size));
> + GEN_INT (-constant_frame));
> RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
> }
> else
> {
> - riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT (-size));
> + riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT (-constant_frame));
> emit_insn (gen_add3_insn (stack_pointer_rtx,
> stack_pointer_rtx,
> RISCV_PROLOGUE_TEMP (Pmode)));
>
> /* Describe the effect of the previous instructions. */
> - insn = plus_constant (Pmode, stack_pointer_rtx, -size);
> + insn = plus_constant (Pmode, stack_pointer_rtx, -constant_frame);
> insn = gen_rtx_SET (stack_pointer_rtx, insn);
> riscv_set_frame_expr (insn);
> }
> @@ -5020,7 +5078,7 @@ riscv_expand_epilogue (int style)
> Start off by assuming that no registers need to be restored. */
> struct riscv_frame_info *frame = &cfun->machine->frame;
> unsigned mask = frame->mask;
> - HOST_WIDE_INT step1 = frame->total_size.to_constant ();
> + poly_int64 step1 = frame->total_size;
> HOST_WIDE_INT step2 = 0;
> bool use_restore_libcall = ((style == NORMAL_RETURN)
> && riscv_use_save_libcall (frame));
> @@ -5056,11 +5114,27 @@ riscv_expand_epilogue (int style)
> riscv_emit_stack_tie ();
> need_barrier_p = false;
>
> - rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset.to_constant ());
> - if (!SMALL_OPERAND (INTVAL (adjust)))
> + poly_int64 adjust_offset = -frame->hard_frame_pointer_offset;
> + rtx adjust = NULL_RTX;
> +
> + if (!adjust_offset.is_constant ())
> {
> - riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust);
> - adjust = RISCV_PROLOGUE_TEMP (Pmode);
> + rtx tmp1 = RISCV_PROLOGUE_TEMP (Pmode);
> + rtx tmp2 = RISCV_PROLOGUE_TEMP2 (Pmode);
> + riscv_legitimize_poly_move (Pmode, tmp1, tmp2,
> + gen_int_mode (adjust_offset, Pmode));
> + adjust = tmp1;
> + }
> + else
> + {
> + if (!SMALL_OPERAND (adjust_offset.to_constant ()))
> + {
> + riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode),
> + GEN_INT (adjust_offset.to_constant ()));
> + adjust = RISCV_PROLOGUE_TEMP (Pmode);
> + }
> + else
> + adjust = GEN_INT (adjust_offset.to_constant ());
> }
>
> insn = emit_insn (
> @@ -5070,7 +5144,7 @@ riscv_expand_epilogue (int style)
> rtx dwarf = NULL_RTX;
> rtx cfa_adjust_value = gen_rtx_PLUS (
> Pmode, hard_frame_pointer_rtx,
> - GEN_INT (-frame->hard_frame_pointer_offset.to_constant ()));
> + gen_int_mode (-frame->hard_frame_pointer_offset, Pmode));
> rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
> dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
> RTX_FRAME_RELATED_P (insn) = 1;
> @@ -5092,10 +5166,20 @@ riscv_expand_epilogue (int style)
> /* Emit a barrier to prevent loads from a deallocated stack. */
> riscv_emit_stack_tie ();
> need_barrier_p = false;
> +
> + /* Restore the scalable frame which is assigned in prologue. */
> + if (!step1.is_constant ())
> + {
> + poly_int64 scalable_frame = step1;
> + scalable_frame.coeffs[0] = step1.coeffs[1];
> + riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame,
> + true);
> + step1 -= scalable_frame;
> + }
>
> /* Get an rtx for STEP1 that we can add to BASE. */
> - rtx adjust = GEN_INT (step1);
> - if (!SMALL_OPERAND (step1))
> + rtx adjust = GEN_INT (step1.to_constant ());
> + if (!SMALL_OPERAND (step1.to_constant ()))
> {
> riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust);
> adjust = RISCV_PROLOGUE_TEMP (Pmode);
> @@ -6463,6 +6547,22 @@ riscv_regmode_natural_size (machine_mode mode)
> return UNITS_PER_WORD;
> }
>
> +/* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
> +
> +static unsigned int
> +riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
> + int *offset)
> +{
> + /* Polynomial invariant 1 == (VLENB / riscv_bytes_per_vector_chunk) - 1.
> + 1. TARGET_MIN_VLEN == 32, olynomial invariant 1 == (VLENB / 4) - 1.
> + 2. TARGET_MIN_VLEN > 32, olynomial invariant 1 == (VLENB / 8) - 1.
> + */
> + gcc_assert (i == 1);
> + *factor = riscv_bytes_per_vector_chunk;
> + *offset = 1;
> + return RISCV_DWARF_VLENB;
> +}
> +
> /* Initialize the GCC target structure. */
> #undef TARGET_ASM_ALIGNED_HI_OP
> #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
> @@ -6684,6 +6784,9 @@ riscv_regmode_natural_size (machine_mode mode)
> #undef TARGET_VECTOR_ALIGNMENT
> #define TARGET_VECTOR_ALIGNMENT riscv_vector_alignment
>
> +#undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
> +#define TARGET_DWARF_POLY_INDETERMINATE_VALUE riscv_dwarf_poly_indeterminate_value
> +
> struct gcc_target targetm = TARGET_INITIALIZER;
>
> #include "gt-riscv.h"
> diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
> index 1385f0a16dc..2d0d170645c 100644
> --- a/gcc/config/riscv/riscv.h
> +++ b/gcc/config/riscv/riscv.h
> @@ -392,6 +392,7 @@ ASM_MISA_SPEC
> /* Define Dwarf for RVV. */
> #define RISCV_DWARF_VL (4096 + 0xc20)
> #define RISCV_DWARF_VTYPE (4096 + 0xc21)
> +#define RISCV_DWARF_VLENB (4096 + 0xc22)
>
> /* Register in which static-chain is passed to a function. */
> #define STATIC_CHAIN_REGNUM (GP_TEMP_FIRST + 2)
> @@ -405,6 +406,8 @@ ASM_MISA_SPEC
>
> #define RISCV_PROLOGUE_TEMP_REGNUM (GP_TEMP_FIRST)
> #define RISCV_PROLOGUE_TEMP(MODE) gen_rtx_REG (MODE, RISCV_PROLOGUE_TEMP_REGNUM)
> +#define RISCV_PROLOGUE_TEMP2_REGNUM (GP_TEMP_FIRST + 1)
> +#define RISCV_PROLOGUE_TEMP2(MODE) gen_rtx_REG (MODE, RISCV_PROLOGUE_TEMP2_REGNUM)
>
> #define RISCV_CALL_ADDRESS_TEMP_REGNUM (GP_TEMP_FIRST + 1)
> #define RISCV_CALL_ADDRESS_TEMP(MODE) \
> diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
> index 1255e33a6f8..bf3611f2eda 100644
> --- a/gcc/config/riscv/vector-iterators.md
> +++ b/gcc/config/riscv/vector-iterators.md
> @@ -34,6 +34,29 @@
> (VNx8DF "TARGET_VECTOR_ELEN_FP_64")
> ])
>
> +(define_mode_iterator V_WHOLE [
> + (VNx4QI "TARGET_MIN_VLEN == 32") VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32")
> + (VNx2HI "TARGET_MIN_VLEN == 32") VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32")
> + (VNx1SI "TARGET_MIN_VLEN == 32") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32")
> + VNx1DI VNx2DI VNx4DI (VNx8DI "TARGET_MIN_VLEN > 32")
> + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 32")
> + (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
> + (VNx4SF "TARGET_VECTOR_ELEN_FP_32")
> + (VNx8SF "TARGET_VECTOR_ELEN_FP_32")
> + (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
> + (VNx1DF "TARGET_VECTOR_ELEN_FP_64")
> + (VNx2DF "TARGET_VECTOR_ELEN_FP_64")
> + (VNx4DF "TARGET_VECTOR_ELEN_FP_64")
> + (VNx8DF "TARGET_VECTOR_ELEN_FP_64")
> +])
> +
> +(define_mode_iterator V_FRACT [
> + VNx1QI VNx2QI (VNx4QI "TARGET_MIN_VLEN > 32")
> + VNx1HI (VNx2HI "TARGET_MIN_VLEN > 32")
> + (VNx1SI "TARGET_MIN_VLEN > 32")
> + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
> +])
> +
> (define_mode_iterator VB [
> VNx1BI VNx2BI VNx4BI VNx8BI VNx16BI VNx32BI
> (VNx64BI "TARGET_MIN_VLEN > 32")
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index 19bb27560f8..8eb4ca63448 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -53,6 +53,36 @@
> (match_operand:V 1 "vector_move_operand"))]
> "TARGET_VECTOR"
> {
> + /* For whole register move, we transform the pattern into the format
> + that excludes the clobber of scratch register.
> +
> + We include clobber of a scalar scratch register which is going to be
> + used for emit of vsetvl instruction after reload_completed since we
> + need vsetvl instruction to set VL/VTYPE global status for fractional
> + vector load/store.
> +
> + For example:
> + [(set (match_operand:VNx1QI v24)
> + (match_operand:VNx1QI (mem: a4)))
> + (clobber (scratch:SI a5))]
> + ====>> vsetvl a5,zero,e8,mf8
> + ====>> vle8.v v24,(a4)
> +
> + Philosophy:
> +
> + - Clobber a scalar scratch register for each mov<mode>.
> +
> + - Classify the machine_mode mode = <MODE>mode into 2 class:
> + Whole register move and fractional register move.
> +
> + - Transform and remove scratch clobber register for whole
> + register move so that we can avoid occupying the scalar
> + registers.
> +
> + - We can not leave it to TARGET_SECONDARY_RELOAD since it happens
> + before spilling. The clobber scratch is used by spilling fractional
> + registers in IRA/LRA so it's too early. */
> +
> if (riscv_vector::legitimize_move (operands[0], operands[1], <VM>mode))
> DONE;
> })
> @@ -61,12 +91,34 @@
> ;; Also applicable for all register moves.
> ;; Fractional vector modes load/store are not allowed to match this pattern.
> ;; Mask modes load/store are not allowed to match this pattern.
> -(define_insn "*mov<mode>"
> - [(set (match_operand:V 0 "reg_or_mem_operand" "=vr,m,vr")
> - (match_operand:V 1 "reg_or_mem_operand" "m,vr,vr"))]
> - "TARGET_VECTOR && ((register_operand (operands[0], <MODE>mode)
> - && register_operand (operands[1], <MODE>mode))
> - || known_ge (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR))"
> +;; We seperate "*mov<mode>" into "*mov<mode>_whole" and "*mov<mode>_fract" because
> +;; we don't want to include fractional load/store in "*mov<mode>" which will
> +;; create unexpected patterns in LRA.
> +;; For example:
> +;; ira rtl:
> +;; (insn 20 19 9 2 (set (reg/v:VNx2QI 97 v1 [ v1 ])
> +;; (reg:VNx2QI 134 [ _1 ])) "rvv.c":9:22 571 {*movvnx2qi_fract}
> +;; (nil))
> +;; When the value of pseudo register 134 of the insn above is discovered already
> +;; spilled in the memory during LRA.
> +;; LRA will reload this pattern into a memory load instruction pattern.
> +;; Because VNx2QI is a fractional vector, we want LRA reload this pattern into
> +;; (insn 20 19 9 2 (parallel [
> +;; (set (reg:VNx2QI 98 v2 [orig:134 _1 ] [134])
> +;; (mem/c:VNx2QI (reg:SI 13 a3 [155]) [1 %sfp+[-2, -2] S[2, 2] A8]))
> +;; (clobber (reg:SI 14 a4 [149]))])
> +;; So that we could be able to emit vsetvl instruction using clobber sratch a4.
> +;; To let LRA generate the expected pattern, we should exclude fractional vector
> +;; load/store in "*mov<mode>_whole". Otherwise, it will reload this pattern into:
> +;; (insn 20 19 9 2 (set (reg:VNx2QI 98 v2 [orig:134 _1 ] [134])
> +;; (mem/c:VNx2QI (reg:SI 13 a3 [155]) [1 %sfp+[-2, -2] S[2, 2] A8])))
> +;; which is not the pattern we want.
> +;; According the facts above, we make "*mov<mode>_whole" includes load/store/move for whole
> +;; vector modes according to '-march' and "*mov<mode>_fract" only include fractional vector modes.
> +(define_insn "*mov<mode>_whole"
> + [(set (match_operand:V_WHOLE 0 "reg_or_mem_operand" "=vr, m,vr")
> + (match_operand:V_WHOLE 1 "reg_or_mem_operand" " m,vr,vr"))]
> + "TARGET_VECTOR"
> "@
> vl%m1re<sew>.v\t%0,%1
> vs%m1r.v\t%1,%0
> @@ -74,18 +126,26 @@
> [(set_attr "type" "vldr,vstr,vmov")
> (set_attr "mode" "<MODE>")])
>
> +(define_insn "*mov<mode>_fract"
> + [(set (match_operand:V_FRACT 0 "register_operand" "=vr")
> + (match_operand:V_FRACT 1 "register_operand" " vr"))]
> + "TARGET_VECTOR"
> + "vmv1r.v\t%0,%1"
> + [(set_attr "type" "vmov")
> + (set_attr "mode" "<MODE>")])
> +
> (define_expand "mov<mode>"
> [(set (match_operand:VB 0 "reg_or_mem_operand")
> (match_operand:VB 1 "vector_move_operand"))]
> "TARGET_VECTOR"
> {
> if (riscv_vector::legitimize_move (operands[0], operands[1], <MODE>mode))
> - DONE;
> + DONE;
> })
>
> (define_insn "*mov<mode>"
> [(set (match_operand:VB 0 "register_operand" "=vr")
> - (match_operand:VB 1 "register_operand" "vr"))]
> + (match_operand:VB 1 "register_operand" " vr"))]
> "TARGET_VECTOR"
> "vmv1r.v\t%0,%1"
> [(set_attr "type" "vmov")
> @@ -290,18 +350,18 @@
> ;; (const_int:QI N)]), -15 <= N < 16.
> ;; 2. (const_vector:VNx1SF repeat [
> ;; (const_double:SF 0.0 [0x0.0p+0])]).
> -(define_insn "@pred_mov<mode>"
> +(define_insn_and_split "@pred_mov<mode>"
> [(set (match_operand:V 0 "nonimmediate_operand" "=vd, vr, m, vr, vr")
> - (if_then_else:V
> - (unspec:<VM>
> - [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1, vmWc1, vmWc1, Wc1")
> - (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK")
> - (match_operand 5 "const_int_operand" " i, i, i, i, i")
> - (match_operand 6 "const_int_operand" " i, i, i, i, i")
> - (reg:SI VL_REGNUM)
> - (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> - (match_operand:V 3 "vector_move_operand" " m, m, vr, vr, viWc0")
> - (match_operand:V 2 "vector_merge_operand" " 0, vu, 0, vu0, vu0")))]
> + (if_then_else:V
> + (unspec:<VM>
> + [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1, vmWc1, Wc1, Wc1")
> + (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK")
> + (match_operand 5 "const_int_operand" " i, i, i, i, i")
> + (match_operand 6 "const_int_operand" " i, i, i, i, i")
> + (reg:SI VL_REGNUM)
> + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> + (match_operand:V 3 "vector_move_operand" " m, m, vr, vr, viWc0")
> + (match_operand:V 2 "vector_merge_operand" " 0, vu, vu0, vu0, vu0")))]
> "TARGET_VECTOR"
> "@
> vle<sew>.v\t%0,%3%p1
> @@ -309,31 +369,41 @@
> vse<sew>.v\t%3,%0%p1
> vmv.v.v\t%0,%3
> vmv.v.i\t%0,v%3"
> + "&& register_operand (operands[0], <MODE>mode)
> + && register_operand (operands[3], <MODE>mode)
> + && satisfies_constraint_vu (operands[2])"
> + [(set (match_dup 0) (match_dup 3))]
> + ""
> [(set_attr "type" "vlde,vlde,vste,vimov,vimov")
> (set_attr "mode" "<MODE>")])
>
> ;; vlm.v/vsm.v/vmclr.m/vmset.m.
> ;; constraint alternative 0 match vlm.v.
> -;; constraint alternative 2 match vsm.v.
> +;; constraint alternative 1 match vsm.v.
> ;; constraint alternative 3 match vmclr.m.
> ;; constraint alternative 4 match vmset.m.
> -(define_insn "@pred_mov<mode>"
> - [(set (match_operand:VB 0 "nonimmediate_operand" "=vr, m, vr, vr")
> - (if_then_else:VB
> - (unspec:VB
> - [(match_operand:VB 1 "vector_mask_operand" "Wc1, Wc1, Wc1, Wc1")
> - (match_operand 4 "vector_length_operand" " rK, rK, rK, rK")
> - (match_operand 5 "const_int_operand" " i, i, i, i")
> - (match_operand 6 "const_int_operand" " i, i, i, i")
> - (reg:SI VL_REGNUM)
> - (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> - (match_operand:VB 3 "vector_move_operand" " m, vr, Wc0, Wc1")
> - (match_operand:VB 2 "vector_merge_operand" " vu, 0, vu, vu")))]
> +(define_insn_and_split "@pred_mov<mode>"
> + [(set (match_operand:VB 0 "nonimmediate_operand" "=vr, m, vr, vr, vr")
> + (if_then_else:VB
> + (unspec:VB
> + [(match_operand:VB 1 "vector_mask_operand" "Wc1, Wc1, Wc1, Wc1, Wc1")
> + (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK")
> + (match_operand 5 "const_int_operand" " i, i, i, i, i")
> + (match_operand 6 "const_int_operand" " i, i, i, i, i")
> + (reg:SI VL_REGNUM)
> + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> + (match_operand:VB 3 "vector_move_operand" " m, vr, vr, Wc0, Wc1")
> + (match_operand:VB 2 "vector_merge_operand" " vu, vu0, vu, vu, vu")))]
> "TARGET_VECTOR"
> "@
> vlm.v\t%0,%3
> vsm.v\t%3,%0
> + #
> vmclr.m\t%0
> vmset.m\t%0"
> - [(set_attr "type" "vldm,vstm,vmalu,vmalu")
> + "&& register_operand (operands[0], <MODE>mode)
> + && register_operand (operands[3], <MODE>mode)"
> + [(set (match_dup 0) (match_dup 3))]
> + ""
> + [(set_attr "type" "vldm,vstm,vimov,vmalu,vmalu")
> (set_attr "mode" "<MODE>")])
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/macro.h b/gcc/testsuite/gcc.target/riscv/rvv/base/macro.h
> new file mode 100644
> index 00000000000..a032ac38f5a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/macro.h
> @@ -0,0 +1,6 @@
> +#define exhaust_vector_regs() \
> + asm volatile("#" :: \
> + : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
> + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", \
> + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", \
> + "v26", "v27", "v28", "v29", "v30", "v31");
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c
> index 7ed10bc5833..ae672824685 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c
> @@ -1,5 +1,5 @@
> /* { dg-do compile } */
> -/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -fno-schedule-insns -fno-schedule-insns2 " } */
> /* { dg-final { check-function-bodies "**" "" } } */
>
> #include <riscv_vector.h>
> @@ -7,12 +7,12 @@
> /* Test tieable of RVV types with same LMUL. */
> /*
> ** mov1:
> -** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1
> ** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au]
> -** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),2
> ** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
> ** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
> +** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1
> ** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
> +** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),2
> ** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
> ** ret
> */
> @@ -28,10 +28,10 @@ void mov1 (int8_t *in, int8_t *out, int M)
>
> /*
> ** mov2:
> -** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1
> ** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au]
> ** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
> ** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
> +** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1
> ** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
> ** ret
> */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c
> new file mode 100644
> index 00000000000..b1220c48f1b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c
> @@ -0,0 +1,385 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "riscv_vector.h"
> +#include "macro.h"
> +
> +/*
> +** spill_1:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** ...
> +** csrr\ta2,vlenb
> +** srli\ta2,a2,3
> +** slli\ta3,a2,3
> +** sub\ta3,a3,a2
> +** add\ta3,a3,sp
> +** vse8.v\tv24,0\(a3\)
> +** ...
> +** csrr\ta2,vlenb
> +** srli\ta2,a2,3
> +** slli\ta3,a2,3
> +** sub\ta3,a3,a2
> +** add\ta3,a3,sp
> +** vle8.v\tv24,0\(a3\)
> +** vse8.v\tv24,0\(a1\)
> +** csrr\tt0,vlenb
> +** add\tsp,sp,t0
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_1 (int8_t *in, int8_t *out)
> +{
> + vint8mf8_t v1 = *(vint8mf8_t*)in;
> + exhaust_vector_regs ();
> + *(vint8mf8_t*)out = v1;
> +}
> +
> +/*
> +** spill_2:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** vsetvli\ta5,zero,e8,mf4,ta,ma
> +** vle8.v\tv24,0\(a0\)
> +** csrr\ta2,vlenb
> +** srli\ta2,a2,2
> +** slli\ta3,a2,2
> +** sub\ta3,a3,a2
> +** add\ta3,a3,sp
> +** vse8.v\tv24,0\(a3\)
> +** ...
> +** csrr\ta2,vlenb
> +** srli\ta2,a2,2
> +** slli\ta3,a2,2
> +** sub\ta3,a3,a2
> +** add\ta3,a3,sp
> +** vle8.v\tv24,0\(a3\)
> +** vse8.v\tv24,0\(a1\)
> +** csrr\tt0,vlenb
> +** add\tsp,sp,t0
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_2 (int8_t *in, int8_t *out)
> +{
> + vint8mf4_t v1 = *(vint8mf4_t*)in;
> + exhaust_vector_regs ();
> + *(vint8mf4_t*)out = v1;
> +}
> +
> +/*
> +** spill_3:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** vsetvli\ta5,zero,e8,mf2,ta,ma
> +** vle8.v\tv24,0\(a0\)
> +** csrr\ta3,vlenb
> +** srli\ta3,a3,1
> +** add\ta3,a3,sp
> +** vse8.v\tv24,0\(a3\)
> +** ...
> +** csrr\ta3,vlenb
> +** srli\ta3,a3,1
> +** add\ta3,a3,sp
> +** vle8.v\tv24,0\(a3\)
> +** vse8.v\tv24,0\(a1\)
> +** csrr\tt0,vlenb
> +** add\tsp,sp,t0
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_3 (int8_t *in, int8_t *out)
> +{
> + vint8mf2_t v1 = *(vint8mf2_t*)in;
> + exhaust_vector_regs ();
> + *(vint8mf2_t*)out = v1;
> +}
> +
> +/*
> +** spill_4:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** ...
> +** vs1r.v\tv24,0\(sp\)
> +** ...
> +** vl1re8.v\tv2,0\(sp\)
> +** vs1r.v\tv2,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_4 (int8_t *in, int8_t *out)
> +{
> + register vint8m1_t v1 asm("v1") = *(vint8m1_t*)in;
> + asm volatile ("# %0"::"vr"(v1));
> + exhaust_vector_regs ();
> + register vint8m1_t v2 asm("v2") = v1;
> + *(vint8m1_t*)out = v2;
> + asm volatile ("# %0"::"vr"(v2));
> +}
> +
> +/*
> +** spill_5:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,1
> +** sub\tsp,sp,t1
> +** ...
> +** vs2r.v\tv24,0\(sp\)
> +** ...
> +** vl2re8.v\tv4,0\(sp\)
> +** vs2r.v\tv4,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_5 (int8_t *in, int8_t *out)
> +{
> + register vint8m2_t v2 asm("v2") = *(vint8m2_t*)in;
> + asm volatile ("# %0"::"vr"(v2));
> + exhaust_vector_regs ();
> + register vint8m2_t v4 asm("v4") = v2;
> + *(vint8m2_t*)out = v4;
> + asm volatile ("# %0"::"vr"(v4));
> +}
> +
> +/*
> +** spill_6:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,2
> +** sub\tsp,sp,t1
> +** ...
> +** vs4r.v\tv24,0\(sp\)
> +** ...
> +** vl4re8.v\tv8,0\(sp\)
> +** vs4r.v\tv8,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_6 (int8_t *in, int8_t *out)
> +{
> + register vint8m4_t v4 asm("v4") = *(vint8m4_t*)in;
> + asm volatile ("# %0"::"vr"(v4));
> + exhaust_vector_regs ();
> + register vint8m4_t v8 asm("v8") = v4;
> + *(vint8m4_t*)out = v8;
> + asm volatile ("# %0"::"vr"(v8));
> +}
> +
> +/*
> +** spill_7:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,3
> +** sub\tsp,sp,t1
> +** ...
> +** vs8r.v\tv24,0\(sp\)
> +** ...
> +** vl8re8.v\tv16,0\(sp\)
> +** vs8r.v\tv16,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_7 (int8_t *in, int8_t *out)
> +{
> + register vint8m8_t v8 asm("v8") = *(vint8m8_t*)in;
> + asm volatile ("# %0"::"vr"(v8));
> + exhaust_vector_regs ();
> + register vint8m8_t v16 asm("v16") = v8;
> + *(vint8m8_t*)out = v16;
> + asm volatile ("# %0"::"vr"(v16));
> +}
> +
> +/*
> +** spill_8:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** vsetvli\ta5,zero,e8,mf8,ta,ma
> +** vle8.v\tv24,0\(a0\)
> +** csrr\ta2,vlenb
> +** srli\ta2,a2,3
> +** slli\ta3,a2,3
> +** sub\ta3,a3,a2
> +** add\ta3,a3,sp
> +** vse8.v\tv24,0\(a3\)
> +** ...
> +** csrr\ta2,vlenb
> +** srli\ta2,a2,3
> +** slli\ta3,a2,3
> +** sub\ta3,a3,a2
> +** add\ta3,a3,sp
> +** vle8.v\tv24,0\(a3\)
> +** vse8.v\tv24,0\(a1\)
> +** csrr\tt0,vlenb
> +** add\tsp,sp,t0
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_8 (uint8_t *in, uint8_t *out)
> +{
> + vuint8mf8_t v1 = *(vuint8mf8_t*)in;
> + exhaust_vector_regs ();
> + *(vuint8mf8_t*)out = v1;
> +}
> +
> +/*
> +** spill_9:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** vsetvli\ta5,zero,e8,mf4,ta,ma
> +** vle8.v\tv24,0\(a0\)
> +** csrr\ta2,vlenb
> +** srli\ta2,a2,2
> +** slli\ta3,a2,2
> +** sub\ta3,a3,a2
> +** add\ta3,a3,sp
> +** vse8.v\tv24,0\(a3\)
> +** ...
> +** csrr\ta2,vlenb
> +** srli\ta2,a2,2
> +** slli\ta3,a2,2
> +** sub\ta3,a3,a2
> +** add\ta3,a3,sp
> +** vle8.v\tv24,0\(a3\)
> +** vse8.v\tv24,0\(a1\)
> +** csrr\tt0,vlenb
> +** add\tsp,sp,t0
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_9 (uint8_t *in, uint8_t *out)
> +{
> + vuint8mf4_t v1 = *(vuint8mf4_t*)in;
> + exhaust_vector_regs ();
> + *(vuint8mf4_t*)out = v1;
> +}
> +
> +/*
> +** spill_10:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** vsetvli\ta5,zero,e8,mf2,ta,ma
> +** vle8.v\tv24,0\(a0\)
> +** csrr\ta3,vlenb
> +** srli\ta3,a3,1
> +** add\ta3,a3,sp
> +** vse8.v\tv24,0\(a3\)
> +** ...
> +** csrr\ta3,vlenb
> +** srli\ta3,a3,1
> +** add\ta3,a3,sp
> +** vle8.v\tv24,0\(a3\)
> +** vse8.v\tv24,0\(a1\)
> +** csrr\tt0,vlenb
> +** add\tsp,sp,t0
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_10 (uint8_t *in, uint8_t *out)
> +{
> + vuint8mf2_t v1 = *(vuint8mf2_t*)in;
> + exhaust_vector_regs ();
> + *(vuint8mf2_t*)out = v1;
> +}
> +
> +/*
> +** spill_11:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** ...
> +** vs1r.v\tv24,0\(sp\)
> +** ...
> +** vl1re8.v\tv2,0\(sp\)
> +** vs1r.v\tv2,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_11 (uint8_t *in, uint8_t *out)
> +{
> + register vuint8m1_t v1 asm("v1") = *(vuint8m1_t*)in;
> + asm volatile ("# %0"::"vr"(v1));
> + exhaust_vector_regs ();
> + register vuint8m1_t v2 asm("v2") = v1;
> + *(vuint8m1_t*)out = v2;
> + asm volatile ("# %0"::"vr"(v2));
> +}
> +
> +/*
> +** spill_12:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,1
> +** sub\tsp,sp,t1
> +** ...
> +** vs2r.v\tv24,0\(sp\)
> +** ...
> +** vl2re8.v\tv4,0\(sp\)
> +** vs2r.v\tv4,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_12 (uint8_t *in, uint8_t *out)
> +{
> + register vuint8m2_t v2 asm("v2") = *(vuint8m2_t*)in;
> + asm volatile ("# %0"::"vr"(v2));
> + exhaust_vector_regs ();
> + register vuint8m2_t v4 asm("v4") = v2;
> + *(vuint8m2_t*)out = v4;
> + asm volatile ("# %0"::"vr"(v4));
> +}
> +
> +/*
> +** spill_13:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,2
> +** sub\tsp,sp,t1
> +** ...
> +** vs4r.v\tv24,0\(sp\)
> +** ...
> +** vl4re8.v\tv8,0\(sp\)
> +** vs4r.v\tv8,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_13 (uint8_t *in, uint8_t *out)
> +{
> + register vuint8m4_t v4 asm("v4") = *(vuint8m4_t*)in;
> + asm volatile ("# %0"::"vr"(v4));
> + exhaust_vector_regs ();
> + register vuint8m4_t v8 asm("v8") = v4;
> + *(vuint8m4_t*)out = v8;
> + asm volatile ("# %0"::"vr"(v8));
> +}
> +
> +/*
> +** spill_14:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,3
> +** sub\tsp,sp,t1
> +** ...
> +** vs8r.v\tv24,0\(sp\)
> +** ...
> +** vl8re8.v\tv16,0\(sp\)
> +** vs8r.v\tv16,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_14 (uint8_t *in, uint8_t *out)
> +{
> + register vuint8m8_t v8 asm("v8") = *(vuint8m8_t*)in;
> + asm volatile ("# %0"::"vr"(v8));
> + exhaust_vector_regs ();
> + register vuint8m8_t v16 asm("v16") = v8;
> + *(vuint8m8_t*)out = v16;
> + asm volatile ("# %0"::"vr"(v16));
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c
> new file mode 100644
> index 00000000000..d37857e24ab
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c
> @@ -0,0 +1,41 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -fno-schedule-insns -fno-schedule-insns2 -O3" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "riscv_vector.h"
> +
> +void f (char*);
> +
> +/*
> +** stach_check_alloca_1:
> +** addi\tsp,sp,-32
> +** sw\tra,4\(sp\)
> +** sw\ts0,0\(sp\)
> +** addi\ts0,sp,8
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** ...
> +** addi\ta2,a2,15
> +** andi\ta2,a2,-8
> +** sub\tsp,sp,a2
> +** ...
> +** lw\tra,4\(sp\)
> +** lw\ts0,0\(sp\)
> +** addi\tsp,sp,32
> +** jr\tra
> +*/
> +void stach_check_alloca_1 (vuint8m1_t data, uint8_t *base, int y, ...)
> +{
> + vuint8m8_t v0, v8, v16, v24;
> + asm volatile ("nop"
> + : "=vr" (v0), "=vr" (v8), "=vr" (v16), "=vr" (v24)
> + :
> + :);
> + asm volatile ("nop"
> + :
> + : "vr" (v0), "vr" (v8), "vr" (v16), "vr" (v24)
> + :);
> + *(vuint8m1_t *)base = data;
> + char* pStr = (char*)__builtin_alloca(y);
> + f(pStr);
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-11.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-11.c
> new file mode 100644
> index 00000000000..c2f68b86d90
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-11.c
> @@ -0,0 +1,60 @@
> +/* { dg-do compile } */
> +/* { dg-options "-msave-restore -march=rv32gcv -mabi=ilp32 -msave-restore -fno-schedule-insns -fno-schedule-insns2 -O3" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +#include "riscv_vector.h"
> +
> +void fn2 (float a1, float a2, float a3, float a4,
> + float a5, float a6, float a7, float a8);
> +void fn3 (char*);
> +
> +/*
> +** stack_save_restore_2:
> +** call\tt0,__riscv_save_2
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,1
> +** sub\tsp,sp,t1
> +** li\tt0,-8192
> +** addi\tt0,t0,192
> +** add\tsp,sp,t0
> +** ...
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,1
> +** add\tsp,sp,t1
> +** li\tt0,8192
> +** addi\tt0,t0,-208
> +** add\tsp,sp,t0
> +** addi\tsp,sp,16
> +** tail\t__riscv_restore_2
> +*/
> +int stack_save_restore_2 (float a1, float a2, float a3, float a4,
> + float a5, float a6, float a7, float a8,
> + vuint8m1_t data, uint8_t *base)
> +{
> + char d[8000];
> + float f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
> + asm volatile ("nop"
> + : "=f" (f1), "=f" (f2), "=f" (f3), "=f" (f4), "=f" (f5), "=f" (f6),
> + "=f" (f7), "=f" (f8), "=f" (f9), "=f" (f10), "=f" (f11),
> + "=f" (f12), "=f" (f13)
> + :
> + :);
> + asm volatile ("nop"
> + :
> + : "f" (f1), "f" (f2), "f" (f3), "f" (f4), "f" (f5), "f" (f6),
> + "f" (f7), "f" (f8), "f" (f9), "f" (f10), "f" (f11),
> + "f" (f12), "f" (f13)
> + :);
> + vuint8m8_t v0, v8, v16, v24;
> + asm volatile ("nop"
> + : "=vr" (v0), "=vr" (v8), "=vr" (v16), "=vr" (v24)
> + :
> + :);
> + asm volatile ("nop"
> + :
> + : "vr" (v0), "vr" (v8), "vr" (v16), "vr" (v24)
> + :);
> + *(vuint8m1_t *)base = data;
> + fn2 (a1, a2, a3, a4, a5, a6, a7, a8);
> + fn3(d);
> + return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-12.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-12.c
> new file mode 100644
> index 00000000000..de6e0604a3c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-12.c
> @@ -0,0 +1,47 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -msave-restore -fno-schedule-insns -fno-schedule-insns2 -O3" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +
> +void fn2 (float a1, float a2, float a3, float a4,
> + float a5, float a6, float a7, float a8);
> +void fn3 (char*);
> +
> +
> +/*
> +** stack_save_restore_1:
> +** call\tt0,__riscv_save_0
> +** li\tt0,-8192
> +** addi\tt0,t0,192
> +** add\tsp,sp,t0
> +** ...
> +** li\ta0,-8192
> +** addi\ta0,a0,192
> +** li\ta5,8192
> +** addi\ta5,a5,-192
> +** add\ta5,a5,a0
> +** add\ta0,a5,sp
> +** ...
> +** tail\t__riscv_restore_0
> +*/
> +int stack_save_restore_1 (float a1, float a2, float a3, float a4,
> + float a5, float a6, float a7, float a8)
> +{
> + char d[8000];
> + float f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
> + asm volatile ("nop"
> + : "=f" (f1), "=f" (f2), "=f" (f3), "=f" (f4), "=f" (f5), "=f" (f6),
> + "=f" (f7), "=f" (f8), "=f" (f9), "=f" (f10), "=f" (f11),
> + "=f" (f12), "=f" (f13)
> + :
> + :);
> + asm volatile ("nop"
> + :
> + : "f" (f1), "f" (f2), "f" (f3), "f" (f4), "f" (f5), "f" (f6),
> + "f" (f7), "f" (f8), "f" (f9), "f" (f10), "f" (f11),
> + "f" (f12), "f" (f13)
> + :);
> + fn2 (a1, a2, a3, a4, a5, a6, a7, a8);
> + fn3(d);
> + return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c
> new file mode 100644
> index 00000000000..ca1904b830d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c
> @@ -0,0 +1,320 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "riscv_vector.h"
> +#include "macro.h"
> +
> +/*
> +** spill_2:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** vsetvli\ta5,zero,e16,mf4,ta,ma
> +** vle16.v\tv24,0\(a0\)
> +** csrr\ta2,vlenb
> +** srli\ta2,a2,2
> +** slli\ta3,a2,2
> +** sub\ta3,a3,a2
> +** add\ta3,a3,sp
> +** vse16.v\tv24,0\(a3\)
> +** ...
> +** csrr\ta2,vlenb
> +** srli\ta2,a2,2
> +** slli\ta3,a2,2
> +** sub\ta3,a3,a2
> +** add\ta3,a3,sp
> +** vle16.v\tv24,0\(a3\)
> +** vse16.v\tv24,0\(a1\)
> +** csrr\tt0,vlenb
> +** add\tsp,sp,t0
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_2 (int16_t *in, int16_t *out)
> +{
> + vint16mf4_t v1 = *(vint16mf4_t*)in;
> + exhaust_vector_regs ();
> + *(vint16mf4_t*)out = v1;
> +}
> +
> +/*
> +** spill_3:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** vsetvli\ta5,zero,e16,mf2,ta,ma
> +** vle16.v\tv24,0\(a0\)
> +** csrr\ta3,vlenb
> +** srli\ta3,a3,1
> +** add\ta3,a3,sp
> +** vse16.v\tv24,0\(a3\)
> +** ...
> +** csrr\ta3,vlenb
> +** srli\ta3,a3,1
> +** add\ta3,a3,sp
> +** vle16.v\tv24,0\(a3\)
> +** vse16.v\tv24,0\(a1\)
> +** csrr\tt0,vlenb
> +** add\tsp,sp,t0
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_3 (int16_t *in, int16_t *out)
> +{
> + vint16mf2_t v1 = *(vint16mf2_t*)in;
> + exhaust_vector_regs ();
> + *(vint16mf2_t*)out = v1;
> +}
> +
> +/*
> +** spill_4:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** ...
> +** vs1r.v\tv24,0\(sp\)
> +** ...
> +** vl1re16.v\tv2,0\(sp\)
> +** vs1r.v\tv2,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_4 (int16_t *in, int16_t *out)
> +{
> + register vint16m1_t v1 asm("v1") = *(vint16m1_t*)in;
> + asm volatile ("# %0"::"vr"(v1));
> + exhaust_vector_regs ();
> + register vint16m1_t v2 asm("v2") = v1;
> + *(vint16m1_t*)out = v2;
> + asm volatile ("# %0"::"vr"(v2));
> +}
> +
> +/*
> +** spill_5:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,1
> +** sub\tsp,sp,t1
> +** ...
> +** vs2r.v\tv24,0\(sp\)
> +** ...
> +** vl2re16.v\tv4,0\(sp\)
> +** vs2r.v\tv4,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_5 (int16_t *in, int16_t *out)
> +{
> + register vint16m2_t v2 asm("v2") = *(vint16m2_t*)in;
> + asm volatile ("# %0"::"vr"(v2));
> + exhaust_vector_regs ();
> + register vint16m2_t v4 asm("v4") = v2;
> + *(vint16m2_t*)out = v4;
> + asm volatile ("# %0"::"vr"(v4));
> +}
> +
> +/*
> +** spill_6:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,2
> +** sub\tsp,sp,t1
> +** ...
> +** vs4r.v\tv24,0\(sp\)
> +** ...
> +** vl4re16.v\tv8,0\(sp\)
> +** vs4r.v\tv8,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_6 (int16_t *in, int16_t *out)
> +{
> + register vint16m4_t v4 asm("v4") = *(vint16m4_t*)in;
> + asm volatile ("# %0"::"vr"(v4));
> + exhaust_vector_regs ();
> + register vint16m4_t v8 asm("v8") = v4;
> + *(vint16m4_t*)out = v8;
> + asm volatile ("# %0"::"vr"(v8));
> +}
> +
> +/*
> +** spill_7:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,3
> +** sub\tsp,sp,t1
> +** ...
> +** vs8r.v\tv24,0\(sp\)
> +** ...
> +** vl8re16.v\tv16,0\(sp\)
> +** vs8r.v\tv16,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_7 (int16_t *in, int16_t *out)
> +{
> + register vint16m8_t v8 asm("v8") = *(vint16m8_t*)in;
> + asm volatile ("# %0"::"vr"(v8));
> + exhaust_vector_regs ();
> + register vint16m8_t v16 asm("v16") = v8;
> + *(vint16m8_t*)out = v16;
> + asm volatile ("# %0"::"vr"(v16));
> +}
> +
> +/*
> +** spill_9:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** vsetvli\ta5,zero,e16,mf4,ta,ma
> +** vle16.v\tv24,0\(a0\)
> +** csrr\ta2,vlenb
> +** srli\ta2,a2,2
> +** slli\ta3,a2,2
> +** sub\ta3,a3,a2
> +** add\ta3,a3,sp
> +** vse16.v\tv24,0\(a3\)
> +** ...
> +** csrr\ta2,vlenb
> +** srli\ta2,a2,2
> +** slli\ta3,a2,2
> +** sub\ta3,a3,a2
> +** add\ta3,a3,sp
> +** vle16.v\tv24,0\(a3\)
> +** vse16.v\tv24,0\(a1\)
> +** csrr\tt0,vlenb
> +** add\tsp,sp,t0
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_9 (uint16_t *in, uint16_t *out)
> +{
> + vuint16mf4_t v1 = *(vuint16mf4_t*)in;
> + exhaust_vector_regs ();
> + *(vuint16mf4_t*)out = v1;
> +}
> +
> +/*
> +** spill_10:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** vsetvli\ta5,zero,e16,mf2,ta,ma
> +** vle16.v\tv24,0\(a0\)
> +** csrr\ta3,vlenb
> +** srli\ta3,a3,1
> +** add\ta3,a3,sp
> +** vse16.v\tv24,0\(a3\)
> +** ...
> +** csrr\ta3,vlenb
> +** srli\ta3,a3,1
> +** add\ta3,a3,sp
> +** vle16.v\tv24,0\(a3\)
> +** vse16.v\tv24,0\(a1\)
> +** csrr\tt0,vlenb
> +** add\tsp,sp,t0
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_10 (uint16_t *in, uint16_t *out)
> +{
> + vuint16mf2_t v1 = *(vuint16mf2_t*)in;
> + exhaust_vector_regs ();
> + *(vuint16mf2_t*)out = v1;
> +}
> +
> +/*
> +** spill_11:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** ...
> +** vs1r.v\tv24,0\(sp\)
> +** ...
> +** vl1re16.v\tv2,0\(sp\)
> +** vs1r.v\tv2,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_11 (uint16_t *in, uint16_t *out)
> +{
> + register vuint16m1_t v1 asm("v1") = *(vuint16m1_t*)in;
> + asm volatile ("# %0"::"vr"(v1));
> + exhaust_vector_regs ();
> + register vuint16m1_t v2 asm("v2") = v1;
> + *(vuint16m1_t*)out = v2;
> + asm volatile ("# %0"::"vr"(v2));
> +}
> +
> +/*
> +** spill_12:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,1
> +** sub\tsp,sp,t1
> +** ...
> +** vs2r.v\tv24,0\(sp\)
> +** ...
> +** vl2re16.v\tv4,0\(sp\)
> +** vs2r.v\tv4,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_12 (uint16_t *in, uint16_t *out)
> +{
> + register vuint16m2_t v2 asm("v2") = *(vuint16m2_t*)in;
> + asm volatile ("# %0"::"vr"(v2));
> + exhaust_vector_regs ();
> + register vuint16m2_t v4 asm("v4") = v2;
> + *(vuint16m2_t*)out = v4;
> + asm volatile ("# %0"::"vr"(v4));
> +}
> +
> +/*
> +** spill_13:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,2
> +** sub\tsp,sp,t1
> +** ...
> +** vs4r.v\tv24,0\(sp\)
> +** ...
> +** vl4re16.v\tv8,0\(sp\)
> +** vs4r.v\tv8,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_13 (uint16_t *in, uint16_t *out)
> +{
> + register vuint16m4_t v4 asm("v4") = *(vuint16m4_t*)in;
> + asm volatile ("# %0"::"vr"(v4));
> + exhaust_vector_regs ();
> + register vuint16m4_t v8 asm("v8") = v4;
> + *(vuint16m4_t*)out = v8;
> + asm volatile ("# %0"::"vr"(v8));
> +}
> +
> +/*
> +** spill_14:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,3
> +** sub\tsp,sp,t1
> +** ...
> +** vs8r.v\tv24,0\(sp\)
> +** ...
> +** vl8re16.v\tv16,0\(sp\)
> +** vs8r.v\tv16,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_14 (uint16_t *in, uint16_t *out)
> +{
> + register vuint16m8_t v8 asm("v8") = *(vuint16m8_t*)in;
> + asm volatile ("# %0"::"vr"(v8));
> + exhaust_vector_regs ();
> + register vuint16m8_t v16 asm("v16") = v8;
> + *(vuint16m8_t*)out = v16;
> + asm volatile ("# %0"::"vr"(v16));
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c
> new file mode 100644
> index 00000000000..2039ca34516
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c
> @@ -0,0 +1,254 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "riscv_vector.h"
> +#include "macro.h"
> +
> +/*
> +** spill_3:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** vsetvli\ta5,zero,e32,mf2,ta,ma
> +** vle32.v\tv24,0\(a0\)
> +** csrr\ta3,vlenb
> +** srli\ta3,a3,1
> +** add\ta3,a3,sp
> +** vse32.v\tv24,0\(a3\)
> +** ...
> +** csrr\ta3,vlenb
> +** srli\ta3,a3,1
> +** add\ta3,a3,sp
> +** vle32.v\tv24,0\(a3\)
> +** vse32.v\tv24,0\(a1\)
> +** csrr\tt0,vlenb
> +** add\tsp,sp,t0
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_3 (int32_t *in, int32_t *out)
> +{
> + vint32mf2_t v1 = *(vint32mf2_t*)in;
> + exhaust_vector_regs ();
> + *(vint32mf2_t*)out = v1;
> +}
> +
> +/*
> +** spill_4:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** ...
> +** vs1r.v\tv24,0\(sp\)
> +** ...
> +** vl1re32.v\tv2,0\(sp\)
> +** vs1r.v\tv2,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_4 (int32_t *in, int32_t *out)
> +{
> + register vint32m1_t v1 asm("v1") = *(vint32m1_t*)in;
> + asm volatile ("# %0"::"vr"(v1));
> + exhaust_vector_regs ();
> + register vint32m1_t v2 asm("v2") = v1;
> + *(vint32m1_t*)out = v2;
> + asm volatile ("# %0"::"vr"(v2));
> +}
> +
> +/*
> +** spill_5:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,1
> +** sub\tsp,sp,t1
> +** ...
> +** vs2r.v\tv24,0\(sp\)
> +** ...
> +** vl2re32.v\tv4,0\(sp\)
> +** vs2r.v\tv4,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_5 (int32_t *in, int32_t *out)
> +{
> + register vint32m2_t v2 asm("v2") = *(vint32m2_t*)in;
> + asm volatile ("# %0"::"vr"(v2));
> + exhaust_vector_regs ();
> + register vint32m2_t v4 asm("v4") = v2;
> + *(vint32m2_t*)out = v4;
> + asm volatile ("# %0"::"vr"(v4));
> +}
> +
> +/*
> +** spill_6:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,2
> +** sub\tsp,sp,t1
> +** ...
> +** vs4r.v\tv24,0\(sp\)
> +** ...
> +** vl4re32.v\tv8,0\(sp\)
> +** vs4r.v\tv8,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_6 (int32_t *in, int32_t *out)
> +{
> + register vint32m4_t v4 asm("v4") = *(vint32m4_t*)in;
> + asm volatile ("# %0"::"vr"(v4));
> + exhaust_vector_regs ();
> + register vint32m4_t v8 asm("v8") = v4;
> + *(vint32m4_t*)out = v8;
> + asm volatile ("# %0"::"vr"(v8));
> +}
> +
> +/*
> +** spill_7:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,3
> +** sub\tsp,sp,t1
> +** ...
> +** vs8r.v\tv24,0\(sp\)
> +** ...
> +** vl8re32.v\tv16,0\(sp\)
> +** vs8r.v\tv16,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_7 (int32_t *in, int32_t *out)
> +{
> + register vint32m8_t v8 asm("v8") = *(vint32m8_t*)in;
> + asm volatile ("# %0"::"vr"(v8));
> + exhaust_vector_regs ();
> + register vint32m8_t v16 asm("v16") = v8;
> + *(vint32m8_t*)out = v16;
> + asm volatile ("# %0"::"vr"(v16));
> +}
> +
> +/*
> +** spill_10:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** vsetvli\ta5,zero,e32,mf2,ta,ma
> +** vle32.v\tv24,0\(a0\)
> +** csrr\ta3,vlenb
> +** srli\ta3,a3,1
> +** add\ta3,a3,sp
> +** vse32.v\tv24,0\(a3\)
> +** ...
> +** csrr\ta3,vlenb
> +** srli\ta3,a3,1
> +** add\ta3,a3,sp
> +** vle32.v\tv24,0\(a3\)
> +** vse32.v\tv24,0\(a1\)
> +** csrr\tt0,vlenb
> +** add\tsp,sp,t0
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_10 (uint32_t *in, uint32_t *out)
> +{
> + vuint32mf2_t v1 = *(vuint32mf2_t*)in;
> + exhaust_vector_regs ();
> + *(vuint32mf2_t*)out = v1;
> +}
> +
> +/*
> +** spill_11:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** ...
> +** vs1r.v\tv24,0\(sp\)
> +** ...
> +** vl1re32.v\tv2,0\(sp\)
> +** vs1r.v\tv2,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_11 (uint32_t *in, uint32_t *out)
> +{
> + register vuint32m1_t v1 asm("v1") = *(vuint32m1_t*)in;
> + asm volatile ("# %0"::"vr"(v1));
> + exhaust_vector_regs ();
> + register vuint32m1_t v2 asm("v2") = v1;
> + *(vuint32m1_t*)out = v2;
> + asm volatile ("# %0"::"vr"(v2));
> +}
> +
> +/*
> +** spill_12:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,1
> +** sub\tsp,sp,t1
> +** ...
> +** vs2r.v\tv24,0\(sp\)
> +** ...
> +** vl2re32.v\tv4,0\(sp\)
> +** vs2r.v\tv4,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_12 (uint32_t *in, uint32_t *out)
> +{
> + register vuint32m2_t v2 asm("v2") = *(vuint32m2_t*)in;
> + asm volatile ("# %0"::"vr"(v2));
> + exhaust_vector_regs ();
> + register vuint32m2_t v4 asm("v4") = v2;
> + *(vuint32m2_t*)out = v4;
> + asm volatile ("# %0"::"vr"(v4));
> +}
> +
> +/*
> +** spill_13:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,2
> +** sub\tsp,sp,t1
> +** ...
> +** vs4r.v\tv24,0\(sp\)
> +** ...
> +** vl4re32.v\tv8,0\(sp\)
> +** vs4r.v\tv8,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_13 (uint32_t *in, uint32_t *out)
> +{
> + register vuint32m4_t v4 asm("v4") = *(vuint32m4_t*)in;
> + asm volatile ("# %0"::"vr"(v4));
> + exhaust_vector_regs ();
> + register vuint32m4_t v8 asm("v8") = v4;
> + *(vuint32m4_t*)out = v8;
> + asm volatile ("# %0"::"vr"(v8));
> +}
> +
> +/*
> +** spill_14:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,3
> +** sub\tsp,sp,t1
> +** ...
> +** vs8r.v\tv24,0\(sp\)
> +** ...
> +** vl8re32.v\tv16,0\(sp\)
> +** vs8r.v\tv16,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_14 (uint32_t *in, uint32_t *out)
> +{
> + register vuint32m8_t v8 asm("v8") = *(vuint32m8_t*)in;
> + asm volatile ("# %0"::"vr"(v8));
> + exhaust_vector_regs ();
> + register vuint32m8_t v16 asm("v16") = v8;
> + *(vuint32m8_t*)out = v16;
> + asm volatile ("# %0"::"vr"(v16));
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c
> new file mode 100644
> index 00000000000..83c80b0b045
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c
> @@ -0,0 +1,196 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "riscv_vector.h"
> +#include "macro.h"
> +
> +/*
> +** spill_4:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** ...
> +** vs1r.v\tv24,0\(sp\)
> +** ...
> +** vl1re64.v\tv2,0\(sp\)
> +** vs1r.v\tv2,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_4 (int64_t *in, int64_t *out)
> +{
> + register vint64m1_t v1 asm("v1") = *(vint64m1_t*)in;
> + asm volatile ("# %0"::"vr"(v1));
> + exhaust_vector_regs ();
> + register vint64m1_t v2 asm("v2") = v1;
> + *(vint64m1_t*)out = v2;
> + asm volatile ("# %0"::"vr"(v2));
> +}
> +
> +/*
> +** spill_5:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,1
> +** sub\tsp,sp,t1
> +** ...
> +** vs2r.v\tv24,0\(sp\)
> +** ...
> +** vl2re64.v\tv4,0\(sp\)
> +** vs2r.v\tv4,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_5 (int64_t *in, int64_t *out)
> +{
> + register vint64m2_t v2 asm("v2") = *(vint64m2_t*)in;
> + asm volatile ("# %0"::"vr"(v2));
> + exhaust_vector_regs ();
> + register vint64m2_t v4 asm("v4") = v2;
> + *(vint64m2_t*)out = v4;
> + asm volatile ("# %0"::"vr"(v4));
> +}
> +
> +/*
> +** spill_6:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,2
> +** sub\tsp,sp,t1
> +** ...
> +** vs4r.v\tv24,0\(sp\)
> +** ...
> +** vl4re64.v\tv8,0\(sp\)
> +** vs4r.v\tv8,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_6 (int64_t *in, int64_t *out)
> +{
> + register vint64m4_t v4 asm("v4") = *(vint64m4_t*)in;
> + asm volatile ("# %0"::"vr"(v4));
> + exhaust_vector_regs ();
> + register vint64m4_t v8 asm("v8") = v4;
> + *(vint64m4_t*)out = v8;
> + asm volatile ("# %0"::"vr"(v8));
> +}
> +
> +/*
> +** spill_7:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,3
> +** sub\tsp,sp,t1
> +** ...
> +** vs8r.v\tv24,0\(sp\)
> +** ...
> +** vl8re64.v\tv16,0\(sp\)
> +** vs8r.v\tv16,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_7 (int64_t *in, int64_t *out)
> +{
> + register vint64m8_t v8 asm("v8") = *(vint64m8_t*)in;
> + asm volatile ("# %0"::"vr"(v8));
> + exhaust_vector_regs ();
> + register vint64m8_t v16 asm("v16") = v8;
> + *(vint64m8_t*)out = v16;
> + asm volatile ("# %0"::"vr"(v16));
> +}
> +
> +/*
> +** spill_11:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** ...
> +** vs1r.v\tv24,0\(sp\)
> +** ...
> +** vl1re64.v\tv2,0\(sp\)
> +** vs1r.v\tv2,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_11 (uint64_t *in, uint64_t *out)
> +{
> + register vuint64m1_t v1 asm("v1") = *(vuint64m1_t*)in;
> + asm volatile ("# %0"::"vr"(v1));
> + exhaust_vector_regs ();
> + register vuint64m1_t v2 asm("v2") = v1;
> + *(vuint64m1_t*)out = v2;
> + asm volatile ("# %0"::"vr"(v2));
> +}
> +
> +/*
> +** spill_12:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,1
> +** sub\tsp,sp,t1
> +** ...
> +** vs2r.v\tv24,0\(sp\)
> +** ...
> +** vl2re64.v\tv4,0\(sp\)
> +** vs2r.v\tv4,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_12 (uint64_t *in, uint64_t *out)
> +{
> + register vuint64m2_t v2 asm("v2") = *(vuint64m2_t*)in;
> + asm volatile ("# %0"::"vr"(v2));
> + exhaust_vector_regs ();
> + register vuint64m2_t v4 asm("v4") = v2;
> + *(vuint64m2_t*)out = v4;
> + asm volatile ("# %0"::"vr"(v4));
> +}
> +
> +/*
> +** spill_13:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,2
> +** sub\tsp,sp,t1
> +** ...
> +** vs4r.v\tv24,0\(sp\)
> +** ...
> +** vl4re64.v\tv8,0\(sp\)
> +** vs4r.v\tv8,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_13 (uint64_t *in, uint64_t *out)
> +{
> + register vuint64m4_t v4 asm("v4") = *(vuint64m4_t*)in;
> + asm volatile ("# %0"::"vr"(v4));
> + exhaust_vector_regs ();
> + register vuint64m4_t v8 asm("v8") = v4;
> + *(vuint64m4_t*)out = v8;
> + asm volatile ("# %0"::"vr"(v8));
> +}
> +
> +/*
> +** spill_14:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,3
> +** sub\tsp,sp,t1
> +** ...
> +** vs8r.v\tv24,0\(sp\)
> +** ...
> +** vl8re64.v\tv16,0\(sp\)
> +** vs8r.v\tv16,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_14 (uint64_t *in, uint64_t *out)
> +{
> + register vuint64m8_t v8 asm("v8") = *(vuint64m8_t*)in;
> + asm volatile ("# %0"::"vr"(v8));
> + exhaust_vector_regs ();
> + register vuint64m8_t v16 asm("v16") = v8;
> + *(vuint64m8_t*)out = v16;
> + asm volatile ("# %0"::"vr"(v16));
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c
> new file mode 100644
> index 00000000000..3c228a00c48
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c
> @@ -0,0 +1,130 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "riscv_vector.h"
> +#include "macro.h"
> +
> +/*
> +** spill_3:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** vsetvli\ta5,zero,e32,mf2,ta,ma
> +** vle32.v\tv24,0\(a0\)
> +** csrr\ta3,vlenb
> +** srli\ta3,a3,1
> +** add\ta3,a3,sp
> +** vse32.v\tv24,0\(a3\)
> +** ...
> +** csrr\ta3,vlenb
> +** srli\ta3,a3,1
> +** add\ta3,a3,sp
> +** vle32.v\tv24,0\(a3\)
> +** vse32.v\tv24,0\(a1\)
> +** csrr\tt0,vlenb
> +** add\tsp,sp,t0
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_3 (float *in, float *out)
> +{
> + vfloat32mf2_t v1 = *(vfloat32mf2_t*)in;
> + exhaust_vector_regs ();
> + *(vfloat32mf2_t*)out = v1;
> +}
> +
> +/*
> +** spill_4:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** ...
> +** vs1r.v\tv24,0\(sp\)
> +** ...
> +** vl1re32.v\tv2,0\(sp\)
> +** vs1r.v\tv2,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_4 (float *in, float *out)
> +{
> + register vfloat32m1_t v1 asm("v1") = *(vfloat32m1_t*)in;
> + asm volatile ("# %0"::"vr"(v1));
> + exhaust_vector_regs ();
> + register vfloat32m1_t v2 asm("v2") = v1;
> + *(vfloat32m1_t*)out = v2;
> + asm volatile ("# %0"::"vr"(v2));
> +}
> +
> +/*
> +** spill_5:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,1
> +** sub\tsp,sp,t1
> +** ...
> +** vs2r.v\tv24,0\(sp\)
> +** ...
> +** vl2re32.v\tv4,0\(sp\)
> +** vs2r.v\tv4,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_5 (float *in, float *out)
> +{
> + register vfloat32m2_t v2 asm("v2") = *(vfloat32m2_t*)in;
> + asm volatile ("# %0"::"vr"(v2));
> + exhaust_vector_regs ();
> + register vfloat32m2_t v4 asm("v4") = v2;
> + *(vfloat32m2_t*)out = v4;
> + asm volatile ("# %0"::"vr"(v4));
> +}
> +
> +/*
> +** spill_6:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,2
> +** sub\tsp,sp,t1
> +** ...
> +** vs4r.v\tv24,0\(sp\)
> +** ...
> +** vl4re32.v\tv8,0\(sp\)
> +** vs4r.v\tv8,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_6 (float *in, float *out)
> +{
> + register vfloat32m4_t v4 asm("v4") = *(vfloat32m4_t*)in;
> + asm volatile ("# %0"::"vr"(v4));
> + exhaust_vector_regs ();
> + register vfloat32m4_t v8 asm("v8") = v4;
> + *(vfloat32m4_t*)out = v8;
> + asm volatile ("# %0"::"vr"(v8));
> +}
> +
> +/*
> +** spill_7:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,3
> +** sub\tsp,sp,t1
> +** ...
> +** vs8r.v\tv24,0\(sp\)
> +** ...
> +** vl8re32.v\tv16,0\(sp\)
> +** vs8r.v\tv16,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_7 (float *in, float *out)
> +{
> + register vfloat32m8_t v8 asm("v8") = *(vfloat32m8_t*)in;
> + asm volatile ("# %0"::"vr"(v8));
> + exhaust_vector_regs ();
> + register vfloat32m8_t v16 asm("v16") = v8;
> + *(vfloat32m8_t*)out = v16;
> + asm volatile ("# %0"::"vr"(v16));
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c
> new file mode 100644
> index 00000000000..340029da88b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c
> @@ -0,0 +1,101 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "riscv_vector.h"
> +#include "macro.h"
> +
> +/*
> +** spill_4:
> +** csrr\tt0,vlenb
> +** sub\tsp,sp,t0
> +** ...
> +** vs1r.v\tv24,0\(sp\)
> +** ...
> +** vl1re64.v\tv2,0\(sp\)
> +** vs1r.v\tv2,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_4 (double *in, double *out)
> +{
> + register vfloat64m1_t v1 asm("v1") = *(vfloat64m1_t*)in;
> + asm volatile ("# %0"::"vr"(v1));
> + exhaust_vector_regs ();
> + register vfloat64m1_t v2 asm("v2") = v1;
> + *(vfloat64m1_t*)out = v2;
> + asm volatile ("# %0"::"vr"(v2));
> +}
> +
> +/*
> +** spill_5:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,1
> +** sub\tsp,sp,t1
> +** ...
> +** vs2r.v\tv24,0\(sp\)
> +** ...
> +** vl2re64.v\tv4,0\(sp\)
> +** vs2r.v\tv4,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_5 (double *in, double *out)
> +{
> + register vfloat64m2_t v2 asm("v2") = *(vfloat64m2_t*)in;
> + asm volatile ("# %0"::"vr"(v2));
> + exhaust_vector_regs ();
> + register vfloat64m2_t v4 asm("v4") = v2;
> + *(vfloat64m2_t*)out = v4;
> + asm volatile ("# %0"::"vr"(v4));
> +}
> +
> +/*
> +** spill_6:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,2
> +** sub\tsp,sp,t1
> +** ...
> +** vs4r.v\tv24,0\(sp\)
> +** ...
> +** vl4re64.v\tv8,0\(sp\)
> +** vs4r.v\tv8,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_6 (double *in, double *out)
> +{
> + register vfloat64m4_t v4 asm("v4") = *(vfloat64m4_t*)in;
> + asm volatile ("# %0"::"vr"(v4));
> + exhaust_vector_regs ();
> + register vfloat64m4_t v8 asm("v8") = v4;
> + *(vfloat64m4_t*)out = v8;
> + asm volatile ("# %0"::"vr"(v8));
> +}
> +
> +/*
> +** spill_7:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,3
> +** sub\tsp,sp,t1
> +** ...
> +** vs8r.v\tv24,0\(sp\)
> +** ...
> +** vl8re64.v\tv16,0\(sp\)
> +** vs8r.v\tv16,0\(a1\)
> +** ...
> +** jr\tra
> +*/
> +void
> +spill_7 (double *in, double *out)
> +{
> + register vfloat64m8_t v8 asm("v8") = *(vfloat64m8_t*)in;
> + asm volatile ("# %0"::"vr"(v8));
> + exhaust_vector_regs ();
> + register vfloat64m8_t v16 asm("v16") = v8;
> + *(vfloat64m8_t*)out = v16;
> + asm volatile ("# %0"::"vr"(v16));
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c
> new file mode 100644
> index 00000000000..cf1eea2fa3f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c
> @@ -0,0 +1,114 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "riscv_vector.h"
> +#include "macro.h"
> +
> +/*
> +** spill:
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,4
> +** sub\tsp,sp,t1
> +** vsetvli\ta3,zero,e8,mf8,ta,ma
> +** vle8.v\tv24,0\(a0\)
> +** csrr\ta5,vlenb
> +** srli\ta5,a5,3
> +** add\ta5,a5,sp
> +** vse8.v\tv24,0\(a5\)
> +** addi\ta5,a0,1
> +** vsetvli\ta4,zero,e8,mf4,ta,ma
> +** vle8.v\tv24,0\(a5\)
> +** csrr\ta5,vlenb
> +** srli\ta5,a5,2
> +** add\ta5,a5,sp
> +** vse8.v\tv24,0\(a5\)
> +** addi\ta2,a0,2
> +** vsetvli\ta5,zero,e8,mf2,ta,ma
> +** vle8.v\tv24,0\(a2\)
> +** csrr\ta2,vlenb
> +** srli\ta2,a2,1
> +** add\ta2,a2,sp
> +** vse8.v\tv24,0\(a2\)
> +** addi\ta2,a0,3
> +** vl1re8.v\tv24,0\(a2\)
> +** csrr\ta2,vlenb
> +** add\ta2,a2,sp
> +** vs1r.v\tv24,0\(a2\)
> +** addi\ta2,a0,4
> +** vl2re8.v\tv24,0\(a2\)
> +** csrr\tt3,vlenb
> +** slli\ta2,t3,1
> +** add\ta2,a2,sp
> +** vs2r.v\tv24,0\(a2\)
> +** addi\ta2,a0,5
> +** vl4re8.v\tv24,0\(a2\)
> +** mv\ta2,t3
> +** slli\tt3,t3,2
> +** add\tt3,t3,sp
> +** vs4r.v\tv24,0\(t3\)
> +** addi\ta0,a0,6
> +** vl8re8.v\tv24,0\(a0\)
> +** slli\ta0,a2,3
> +** add\ta0,a0,sp
> +** vs8r.v\tv24,0\(a0\)
> +** ...
> +** srli\ta0,a2,3
> +** add\ta0,a0,sp
> +** ...
> +** vle8.v\tv27,0\(a0\)
> +** vse8.v\tv27,0\(a1\)
> +** addi\ta3,a1,1
> +** srli\ta0,a2,2
> +** add\ta0,a0,sp
> +** ...
> +** vle8.v\tv27,0\(a0\)
> +** vse8.v\tv27,0\(a3\)
> +** addi\ta4,a1,2
> +** srli\ta3,a2,1
> +** add\ta3,a3,sp
> +** ...
> +** vle8.v\tv27,0\(a3\)
> +** vse8.v\tv27,0\(a4\)
> +** addi\ta5,a1,3
> +** add\ta4,a2,sp
> +** vl1re8.v\tv25,0\(a4\)
> +** vs1r.v\tv25,0\(a5\)
> +** addi\ta5,a1,4
> +** slli\ta4,a2,1
> +** add\ta4,a4,sp
> +** vl2re8.v\tv26,0\(a4\)
> +** vs2r.v\tv26,0\(a5\)
> +** addi\ta5,a1,5
> +** vl4re8.v\tv28,0\(t3\)
> +** vs4r.v\tv28,0\(a5\)
> +** addi\ta1,a1,6
> +** slli\ta5,a2,3
> +** add\ta5,a5,sp
> +** vl8re8.v\tv24,0\(a5\)
> +** vs8r.v\tv24,0\(a1\)
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,4
> +** add\tsp,sp,t1
> +** ...
> +** jr\tra
> +*/
> +void
> +spill (int8_t *in, int8_t *out)
> +{
> + vint8mf8_t v0 = *(vint8mf8_t*)in;
> + vint8mf4_t v1 = *(vint8mf4_t*)(in + 1);
> + vint8mf2_t v2 = *(vint8mf2_t*)(in + 2);
> + vint8m1_t v3 = *(vint8m1_t*)(in + 3);
> + vint8m2_t v4 = *(vint8m2_t*)(in + 4);
> + vint8m4_t v8 = *(vint8m4_t*)(in + 5);
> + vint8m8_t v16 = *(vint8m8_t*)(in + 6);
> + exhaust_vector_regs ();
> + *(vint8mf8_t*)out = v0;
> + *(vint8mf4_t*)(out + 1) = v1;
> + *(vint8mf2_t*)(out + 2) = v2;
> + *(vint8m1_t*)(out + 3) = v3;
> + *(vint8m2_t*)(out + 4) = v4;
> + *(vint8m4_t*)(out + 5) = v8;
> + *(vint8m8_t*)(out + 6) = v16;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-8.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-8.c
> new file mode 100644
> index 00000000000..ddc36e888eb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-8.c
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -O3" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +void f2 (char*);
> +void f3 (char*, ...);
> +
> +/*
> +** stach_check_alloca_1:
> +** addi sp,sp,-48
> +** sw ra,12\(sp\)
> +** sw s0,8\(sp\)
> +** addi s0,sp,16
> +** ...
> +** addi a0,a0,23
> +** andi a0,a0,-16
> +** sub sp,sp,a0
> +** ...
> +** addi sp,s0,-16
> +** lw ra,12\(sp\)
> +** lw s0,8\(sp\)
> +** addi sp,sp,48
> +** jr ra
> +*/
> +void stach_check_alloca_1 (int y, ...)
> +{
> + char* pStr = (char*)__builtin_alloca(y);
> + f2(pStr);
> +}
> +
> +/*
> +** stach_check_alloca_2:
> +** addi sp,sp,-48
> +** sw ra,44\(sp\)
> +** sw s0,40\(sp\)
> +** addi s0,sp,48
> +** addi a0,a0,23
> +** andi a0,a0,-16
> +** sub sp,sp,a0
> +** ...
> +** addi sp,s0,-48
> +** lw ra,44\(sp\)
> +** lw s0,40\(sp\)
> +** addi sp,sp,48
> +** jr ra
> +*/
> +void stach_check_alloca_2 (int y)
> +{
> + char* pStr = (char*)__builtin_alloca(y);
> + f3(pStr, pStr, pStr, pStr, pStr, pStr, pStr, pStr, 2, pStr, pStr, pStr, 1);
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-9.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-9.c
> new file mode 100644
> index 00000000000..7111113d393
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-9.c
> @@ -0,0 +1,42 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -O3" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "riscv_vector.h"
> +
> +void f (char*);
> +
> +/*
> +** stach_check_alloca_1:
> +** addi\tsp,sp,-48
> +** sw\tra,12\(sp\)
> +** sw\ts0,8\(sp\)
> +** addi\ts0,sp,16
> +** csrr\tt0,vlenb
> +** slli\tt1,t0,1
> +** sub\tsp,sp,t1
> +** ...
> +** addi\ta2,a2,23
> +** andi\ta2,a2,-16
> +** sub\tsp,sp,a2
> +** ...
> +** lw\tra,12\(sp\)
> +** lw\ts0,8\(sp\)
> +** addi\tsp,sp,48
> +** jr\tra
> +*/
> +void stach_check_alloca_1 (vuint8m1_t data, uint8_t *base, int y, ...)
> +{
> + vuint8m8_t v0, v8, v16, v24;
> + asm volatile ("nop"
> + : "=vr" (v0), "=vr" (v8), "=vr" (v16), "=vr" (v24)
> + :
> + :);
> + asm volatile ("nop"
> + :
> + : "vr" (v0), "vr" (v8), "vr" (v16), "vr" (v24)
> + :);
> + *(vuint8m1_t *)base = data;
> + char* pStr = (char*)__builtin_alloca(y);
> + f(pStr);
> +}
> --
> 2.36.1
>
FAIL: gcc.target/riscv/rvv/base/spill-1.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-1.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-2.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-2.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-3.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-3.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-4.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-4.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-5.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-5.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-6.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-6.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-7.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-7.c (test for excess errors)
This ICE is introduced by this patch:
https://gcc.gnu.org/pipermail/gcc-patches/2022-November/606523.html
PLCT lab is helping with this bug fix.
juzhe.zhong@rivai.ai
From: Andreas Schwab
Date: 2022-11-20 17:24
To: juzhe.zhong
CC: gcc-patches; monk.chiang; kito.cheng
Subject: Re: [PATCH] RISC-V: Add RVV registers register spilling
FAIL: gcc.target/riscv/rvv/base/spill-1.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-1.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-2.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-2.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-3.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-3.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-4.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-4.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-5.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-5.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-6.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-6.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-7.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-7.c (test for excess errors)
--
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510 2552 DF73 E780 A9DA AEC1
"And now for something completely different."
https://gcc.gnu.org/pipermail/gcc-patches/2022-November/606523.html
This patch obviously didn't include scalable size frame.
So it ICE in offset = cfun->machine->frame.gp_sp_offset.to_constant ();
We can't directly use to_constant if the frame is a scalable.
Please fix it or revert it. Thanks
juzhe.zhong@rivai.ai
From: juzhe.zhong@rivai.ai
Date: 2022-11-21 16:38
To: schwab
CC: gcc-patches; monk.chiang; kito.cheng; jiawei
Subject: Re: Re: [PATCH] RISC-V: Add RVV registers register spilling
This ICE is introduced by this patch:
https://gcc.gnu.org/pipermail/gcc-patches/2022-November/606523.html
PLCT lab is helping with this bug fix.
juzhe.zhong@rivai.ai
From: Andreas Schwab
Date: 2022-11-20 17:24
To: juzhe.zhong
CC: gcc-patches; monk.chiang; kito.cheng
Subject: Re: [PATCH] RISC-V: Add RVV registers register spilling
FAIL: gcc.target/riscv/rvv/base/spill-1.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-1.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-2.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-2.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-3.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-3.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-4.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-4.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-5.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-5.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-6.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-6.c (test for excess errors)
FAIL: gcc.target/riscv/rvv/base/spill-7.c (internal compiler error: in to_constant, at poly-int.h:504)
FAIL: gcc.target/riscv/rvv/base/spill-7.c (test for excess errors)
--
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510 2552 DF73 E780 A9DA AEC1
"And now for something completely different."
On 11/21/22 02:25, juzhe.zhong@rivai.ai wrote:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-November/606523.html
> This patch obviously didn't include scalable size frame.
> So it ICE in offset = cfun->machine->frame.gp_sp_offset.to_constant ();
> We can't directly use to_constant if the frame is a scalable.
> Please fix it or revert it. Thanks
We probably just need to reject everything in
riscv_get_setparate_components if the offset isn't constant. Something
like the attached patch (untested) might be enough to resolve the problem.
Jeff
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 7ec4ce97e6c..7bfc0e9f595 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -5340,7 +5340,8 @@ riscv_get_separate_components (void)
bitmap_clear (components);
if (riscv_use_save_libcall (&cfun->machine->frame)
- || cfun->machine->interrupt_handler_p)
+ || cfun->machine->interrupt_handler_p
+ || !cfun->machine->frame.gp_sp_offset.is_constant ())
return components;
offset = cfun->machine->frame.gp_sp_offset.to_constant ();
> -----原始邮件-----
> 发件人: "Jeff Law" <jeffreyalaw@gmail.com>
> 发送时间: 2022-11-21 23:26:37 (星期一)
> 收件人: "juzhe.zhong@rivai.ai" <juzhe.zhong@rivai.ai>, schwab <schwab@linux-m68k.org>
> 抄送: gcc-patches <gcc-patches@gcc.gnu.org>, "monk.chiang" <monk.chiang@sifive.com>, "kito.cheng" <kito.cheng@gmail.com>, jiawei <jiawei@iscas.ac.cn>
> 主题: Re: [PATCH] RISC-V: Add RVV registers register spilling
>
>
> On 11/21/22 02:25, juzhe.zhong@rivai.ai wrote:
> > https://gcc.gnu.org/pipermail/gcc-patches/2022-November/606523.html
> > This patch obviously didn't include scalable size frame.
> > So it ICE in offset = cfun->machine->frame.gp_sp_offset.to_constant ();
> > We can't directly use to_constant if the frame is a scalable.
> > Please fix it or revert it. Thanks
>
> We probably just need to reject everything in
> riscv_get_setparate_components if the offset isn't constant. Something
> like the attached patch (untested) might be enough to resolve the problem.
>
>
> Jeff
>
I tested this patch and it fix that problem and works well,
thanks for you works!
Jiawei
</jiawei@iscas.ac.cn></kito.cheng@gmail.com></monk.chiang@sifive.com></gcc-patches@gcc.gnu.org></schwab@linux-m68k.org></juzhe.zhong@rivai.ai></jeffreyalaw@gmail.com>
@@ -106,28 +106,25 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval,
/* Emit an RVV unmask && vl mov from SRC to DEST. */
static void
-emit_pred_move (rtx dest, rtx src, rtx vl, machine_mode mask_mode)
+emit_pred_move (rtx dest, rtx src, machine_mode mask_mode)
{
insn_expander<7> e;
-
machine_mode mode = GET_MODE (dest);
- if (register_operand (src, mode) && register_operand (dest, mode))
- {
- emit_move_insn (dest, src);
- return;
- }
+ rtx vl = gen_reg_rtx (Pmode);
+ unsigned int sew = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+ ? 8
+ : GET_MODE_BITSIZE (GET_MODE_INNER (mode));
+
+ emit_insn (gen_vsetvl_no_side_effects (
+ Pmode, vl, gen_rtx_REG (Pmode, 0), gen_int_mode (sew, Pmode),
+ gen_int_mode ((unsigned int) mode, Pmode), const1_rtx, const1_rtx));
e.add_output_operand (dest, mode);
e.add_all_one_mask_operand (mask_mode);
- /* For load operation, we create undef operand.
- For store operation, we make it depend on the dest memory to
- avoid potential bugs. */
- if (MEM_P (src))
- e.add_vundef_operand (mode);
- else
- e.add_input_operand (dest, mode);
+ e.add_vundef_operand (mode);
e.add_input_operand (src, mode);
+
e.add_input_operand (vl, Pmode);
e.add_policy_operand (TAIL_AGNOSTIC, MASK_AGNOSTIC);
@@ -143,37 +140,25 @@ bool
legitimize_move (rtx dest, rtx src, machine_mode mask_mode)
{
machine_mode mode = GET_MODE (dest);
- /* For whole registers load/store or register-register move,
- we don't need to specially handle them, just let them go
- through "*mov<mode>" and then use the codegen directly. */
- if ((known_ge (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR)
- && (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL))
- || (register_operand (src, mode) && register_operand (dest, mode)))
+ if (known_ge (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR)
+ && GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
{
/* Need to force register if mem <- !reg. */
if (MEM_P (dest) && !REG_P (src))
src = force_reg (mode, src);
+
return false;
}
-
- rtx vlmax = gen_reg_rtx (Pmode);
- unsigned int sew = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
- ? 8
- : GET_MODE_BITSIZE (GET_MODE_INNER (mode));
- emit_insn (gen_vsetvl_no_side_effects (
- Pmode, vlmax, gen_rtx_REG (Pmode, 0), gen_int_mode (sew, Pmode),
- gen_int_mode ((unsigned int) mode, Pmode), const1_rtx, const1_rtx));
-
if (!register_operand (src, mode) && !register_operand (dest, mode))
{
rtx tmp = gen_reg_rtx (mode);
if (MEM_P (src))
- emit_pred_move (tmp, src, vlmax, mask_mode);
+ emit_pred_move (tmp, src, mask_mode);
else
emit_move_insn (tmp, src);
src = tmp;
}
- emit_pred_move (dest, src, vlmax, mask_mode);
+ emit_pred_move (dest, src, mask_mode);
return true;
}
@@ -1943,6 +1943,37 @@ riscv_legitimize_poly_move (machine_mode mode, rtx dest, rtx tmp, rtx src)
}
}
+/* Adjust scalable frame of vector for prologue && epilogue. */
+
+static void
+riscv_v_adjust_scalable_frame (rtx target, poly_int64 offset, bool epilogue)
+{
+ rtx tmp = RISCV_PROLOGUE_TEMP (Pmode);
+ rtx adjust_size = RISCV_PROLOGUE_TEMP2 (Pmode);
+ rtx insn, dwarf, adjust_frame_rtx;
+
+ riscv_legitimize_poly_move (Pmode, adjust_size, tmp,
+ gen_int_mode (offset, Pmode));
+
+ if (epilogue)
+ insn = gen_add3_insn (target, target, adjust_size);
+ else
+ insn = gen_sub3_insn (target, target, adjust_size);
+
+ insn = emit_insn (insn);
+
+ RTX_FRAME_RELATED_P (insn) = 1;
+
+ adjust_frame_rtx
+ = gen_rtx_SET (target,
+ plus_constant (Pmode, target, epilogue ? offset : -offset));
+
+ dwarf = alloc_reg_note (REG_FRAME_RELATED_EXPR, copy_rtx (adjust_frame_rtx),
+ NULL_RTX);
+
+ REG_NOTES (insn) = dwarf;
+}
+
/* If (set DEST SRC) is not a valid move instruction, emit an equivalent
sequence that is valid. */
@@ -4824,21 +4855,29 @@ riscv_restore_reg (rtx reg, rtx mem)
static HOST_WIDE_INT
riscv_first_stack_step (struct riscv_frame_info *frame)
{
- if (SMALL_OPERAND (frame->total_size.to_constant()))
- return frame->total_size.to_constant();
+ HOST_WIDE_INT frame_total_constant_size;
+ if (!frame->total_size.is_constant ())
+ frame_total_constant_size
+ = riscv_stack_align (frame->total_size.coeffs[0])
+ - riscv_stack_align (frame->total_size.coeffs[1]);
+ else
+ frame_total_constant_size = frame->total_size.to_constant ();
+
+ if (SMALL_OPERAND (frame_total_constant_size))
+ return frame_total_constant_size;
HOST_WIDE_INT min_first_step =
RISCV_STACK_ALIGN ((frame->total_size - frame->fp_sp_offset).to_constant());
HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
- HOST_WIDE_INT min_second_step = frame->total_size.to_constant() - max_first_step;
+ HOST_WIDE_INT min_second_step = frame_total_constant_size - max_first_step;
gcc_assert (min_first_step <= max_first_step);
/* As an optimization, use the least-significant bits of the total frame
size, so that the second adjustment step is just LUI + ADD. */
if (!SMALL_OPERAND (min_second_step)
- && frame->total_size.to_constant() % IMM_REACH < IMM_REACH / 2
- && frame->total_size.to_constant() % IMM_REACH >= min_first_step)
- return frame->total_size.to_constant() % IMM_REACH;
+ && frame_total_constant_size % IMM_REACH < IMM_REACH / 2
+ && frame_total_constant_size % IMM_REACH >= min_first_step)
+ return frame_total_constant_size % IMM_REACH;
if (TARGET_RVC)
{
@@ -4911,12 +4950,12 @@ void
riscv_expand_prologue (void)
{
struct riscv_frame_info *frame = &cfun->machine->frame;
- HOST_WIDE_INT size = frame->total_size.to_constant ();
+ poly_int64 size = frame->total_size;
unsigned mask = frame->mask;
rtx insn;
if (flag_stack_usage_info)
- current_function_static_stack_size = size;
+ current_function_static_stack_size = constant_lower_bound (size);
if (cfun->machine->naked_p)
return;
@@ -4938,7 +4977,9 @@ riscv_expand_prologue (void)
/* Save the registers. */
if ((frame->mask | frame->fmask) != 0)
{
- HOST_WIDE_INT step1 = MIN (size, riscv_first_stack_step (frame));
+ HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
+ if (size.is_constant ())
+ step1 = MIN (size.to_constant(), step1);
insn = gen_add3_insn (stack_pointer_rtx,
stack_pointer_rtx,
@@ -4961,23 +5002,40 @@ riscv_expand_prologue (void)
}
/* Allocate the rest of the frame. */
- if (size > 0)
+ if (known_gt (size, 0))
{
- if (SMALL_OPERAND (-size))
+ /* Two step adjustment:
+ 1.scalable frame. 2.constant frame. */
+ poly_int64 scalable_frame (0, 0);
+ if (!size.is_constant ())
+ {
+ /* First for scalable frame. */
+ poly_int64 scalable_frame = size;
+ scalable_frame.coeffs[0] = size.coeffs[1];
+ riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, false);
+ size -= scalable_frame;
+ }
+
+ /* Second step for constant frame. */
+ HOST_WIDE_INT constant_frame = size.to_constant ();
+ if (constant_frame == 0)
+ return;
+
+ if (SMALL_OPERAND (-constant_frame))
{
insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (-size));
+ GEN_INT (-constant_frame));
RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
}
else
{
- riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT (-size));
+ riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT (-constant_frame));
emit_insn (gen_add3_insn (stack_pointer_rtx,
stack_pointer_rtx,
RISCV_PROLOGUE_TEMP (Pmode)));
/* Describe the effect of the previous instructions. */
- insn = plus_constant (Pmode, stack_pointer_rtx, -size);
+ insn = plus_constant (Pmode, stack_pointer_rtx, -constant_frame);
insn = gen_rtx_SET (stack_pointer_rtx, insn);
riscv_set_frame_expr (insn);
}
@@ -5020,7 +5078,7 @@ riscv_expand_epilogue (int style)
Start off by assuming that no registers need to be restored. */
struct riscv_frame_info *frame = &cfun->machine->frame;
unsigned mask = frame->mask;
- HOST_WIDE_INT step1 = frame->total_size.to_constant ();
+ poly_int64 step1 = frame->total_size;
HOST_WIDE_INT step2 = 0;
bool use_restore_libcall = ((style == NORMAL_RETURN)
&& riscv_use_save_libcall (frame));
@@ -5056,11 +5114,27 @@ riscv_expand_epilogue (int style)
riscv_emit_stack_tie ();
need_barrier_p = false;
- rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset.to_constant ());
- if (!SMALL_OPERAND (INTVAL (adjust)))
+ poly_int64 adjust_offset = -frame->hard_frame_pointer_offset;
+ rtx adjust = NULL_RTX;
+
+ if (!adjust_offset.is_constant ())
{
- riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust);
- adjust = RISCV_PROLOGUE_TEMP (Pmode);
+ rtx tmp1 = RISCV_PROLOGUE_TEMP (Pmode);
+ rtx tmp2 = RISCV_PROLOGUE_TEMP2 (Pmode);
+ riscv_legitimize_poly_move (Pmode, tmp1, tmp2,
+ gen_int_mode (adjust_offset, Pmode));
+ adjust = tmp1;
+ }
+ else
+ {
+ if (!SMALL_OPERAND (adjust_offset.to_constant ()))
+ {
+ riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode),
+ GEN_INT (adjust_offset.to_constant ()));
+ adjust = RISCV_PROLOGUE_TEMP (Pmode);
+ }
+ else
+ adjust = GEN_INT (adjust_offset.to_constant ());
}
insn = emit_insn (
@@ -5070,7 +5144,7 @@ riscv_expand_epilogue (int style)
rtx dwarf = NULL_RTX;
rtx cfa_adjust_value = gen_rtx_PLUS (
Pmode, hard_frame_pointer_rtx,
- GEN_INT (-frame->hard_frame_pointer_offset.to_constant ()));
+ gen_int_mode (-frame->hard_frame_pointer_offset, Pmode));
rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
RTX_FRAME_RELATED_P (insn) = 1;
@@ -5092,10 +5166,20 @@ riscv_expand_epilogue (int style)
/* Emit a barrier to prevent loads from a deallocated stack. */
riscv_emit_stack_tie ();
need_barrier_p = false;
+
+ /* Restore the scalable frame which is assigned in prologue. */
+ if (!step1.is_constant ())
+ {
+ poly_int64 scalable_frame = step1;
+ scalable_frame.coeffs[0] = step1.coeffs[1];
+ riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame,
+ true);
+ step1 -= scalable_frame;
+ }
/* Get an rtx for STEP1 that we can add to BASE. */
- rtx adjust = GEN_INT (step1);
- if (!SMALL_OPERAND (step1))
+ rtx adjust = GEN_INT (step1.to_constant ());
+ if (!SMALL_OPERAND (step1.to_constant ()))
{
riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust);
adjust = RISCV_PROLOGUE_TEMP (Pmode);
@@ -6463,6 +6547,22 @@ riscv_regmode_natural_size (machine_mode mode)
return UNITS_PER_WORD;
}
+/* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
+
+static unsigned int
+riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
+ int *offset)
+{
+ /* Polynomial invariant 1 == (VLENB / riscv_bytes_per_vector_chunk) - 1.
+ 1. TARGET_MIN_VLEN == 32, olynomial invariant 1 == (VLENB / 4) - 1.
+ 2. TARGET_MIN_VLEN > 32, olynomial invariant 1 == (VLENB / 8) - 1.
+ */
+ gcc_assert (i == 1);
+ *factor = riscv_bytes_per_vector_chunk;
+ *offset = 1;
+ return RISCV_DWARF_VLENB;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -6684,6 +6784,9 @@ riscv_regmode_natural_size (machine_mode mode)
#undef TARGET_VECTOR_ALIGNMENT
#define TARGET_VECTOR_ALIGNMENT riscv_vector_alignment
+#undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
+#define TARGET_DWARF_POLY_INDETERMINATE_VALUE riscv_dwarf_poly_indeterminate_value
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-riscv.h"
@@ -392,6 +392,7 @@ ASM_MISA_SPEC
/* Define Dwarf for RVV. */
#define RISCV_DWARF_VL (4096 + 0xc20)
#define RISCV_DWARF_VTYPE (4096 + 0xc21)
+#define RISCV_DWARF_VLENB (4096 + 0xc22)
/* Register in which static-chain is passed to a function. */
#define STATIC_CHAIN_REGNUM (GP_TEMP_FIRST + 2)
@@ -405,6 +406,8 @@ ASM_MISA_SPEC
#define RISCV_PROLOGUE_TEMP_REGNUM (GP_TEMP_FIRST)
#define RISCV_PROLOGUE_TEMP(MODE) gen_rtx_REG (MODE, RISCV_PROLOGUE_TEMP_REGNUM)
+#define RISCV_PROLOGUE_TEMP2_REGNUM (GP_TEMP_FIRST + 1)
+#define RISCV_PROLOGUE_TEMP2(MODE) gen_rtx_REG (MODE, RISCV_PROLOGUE_TEMP2_REGNUM)
#define RISCV_CALL_ADDRESS_TEMP_REGNUM (GP_TEMP_FIRST + 1)
#define RISCV_CALL_ADDRESS_TEMP(MODE) \
@@ -34,6 +34,29 @@
(VNx8DF "TARGET_VECTOR_ELEN_FP_64")
])
+(define_mode_iterator V_WHOLE [
+ (VNx4QI "TARGET_MIN_VLEN == 32") VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32")
+ (VNx2HI "TARGET_MIN_VLEN == 32") VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32")
+ (VNx1SI "TARGET_MIN_VLEN == 32") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32")
+ VNx1DI VNx2DI VNx4DI (VNx8DI "TARGET_MIN_VLEN > 32")
+ (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 32")
+ (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
+ (VNx4SF "TARGET_VECTOR_ELEN_FP_32")
+ (VNx8SF "TARGET_VECTOR_ELEN_FP_32")
+ (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+ (VNx1DF "TARGET_VECTOR_ELEN_FP_64")
+ (VNx2DF "TARGET_VECTOR_ELEN_FP_64")
+ (VNx4DF "TARGET_VECTOR_ELEN_FP_64")
+ (VNx8DF "TARGET_VECTOR_ELEN_FP_64")
+])
+
+(define_mode_iterator V_FRACT [
+ VNx1QI VNx2QI (VNx4QI "TARGET_MIN_VLEN > 32")
+ VNx1HI (VNx2HI "TARGET_MIN_VLEN > 32")
+ (VNx1SI "TARGET_MIN_VLEN > 32")
+ (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+])
+
(define_mode_iterator VB [
VNx1BI VNx2BI VNx4BI VNx8BI VNx16BI VNx32BI
(VNx64BI "TARGET_MIN_VLEN > 32")
@@ -53,6 +53,36 @@
(match_operand:V 1 "vector_move_operand"))]
"TARGET_VECTOR"
{
+ /* For whole register move, we transform the pattern into the format
+ that excludes the clobber of scratch register.
+
+ We include clobber of a scalar scratch register which is going to be
+ used for emit of vsetvl instruction after reload_completed since we
+ need vsetvl instruction to set VL/VTYPE global status for fractional
+ vector load/store.
+
+ For example:
+ [(set (match_operand:VNx1QI v24)
+ (match_operand:VNx1QI (mem: a4)))
+ (clobber (scratch:SI a5))]
+ ====>> vsetvl a5,zero,e8,mf8
+ ====>> vle8.v v24,(a4)
+
+ Philosophy:
+
+ - Clobber a scalar scratch register for each mov<mode>.
+
+ - Classify the machine_mode mode = <MODE>mode into 2 class:
+ Whole register move and fractional register move.
+
+ - Transform and remove scratch clobber register for whole
+ register move so that we can avoid occupying the scalar
+ registers.
+
+ - We can not leave it to TARGET_SECONDARY_RELOAD since it happens
+ before spilling. The clobber scratch is used by spilling fractional
+ registers in IRA/LRA so it's too early. */
+
if (riscv_vector::legitimize_move (operands[0], operands[1], <VM>mode))
DONE;
})
@@ -61,12 +91,34 @@
;; Also applicable for all register moves.
;; Fractional vector modes load/store are not allowed to match this pattern.
;; Mask modes load/store are not allowed to match this pattern.
-(define_insn "*mov<mode>"
- [(set (match_operand:V 0 "reg_or_mem_operand" "=vr,m,vr")
- (match_operand:V 1 "reg_or_mem_operand" "m,vr,vr"))]
- "TARGET_VECTOR && ((register_operand (operands[0], <MODE>mode)
- && register_operand (operands[1], <MODE>mode))
- || known_ge (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR))"
+;; We seperate "*mov<mode>" into "*mov<mode>_whole" and "*mov<mode>_fract" because
+;; we don't want to include fractional load/store in "*mov<mode>" which will
+;; create unexpected patterns in LRA.
+;; For example:
+;; ira rtl:
+;; (insn 20 19 9 2 (set (reg/v:VNx2QI 97 v1 [ v1 ])
+;; (reg:VNx2QI 134 [ _1 ])) "rvv.c":9:22 571 {*movvnx2qi_fract}
+;; (nil))
+;; When the value of pseudo register 134 of the insn above is discovered already
+;; spilled in the memory during LRA.
+;; LRA will reload this pattern into a memory load instruction pattern.
+;; Because VNx2QI is a fractional vector, we want LRA reload this pattern into
+;; (insn 20 19 9 2 (parallel [
+;; (set (reg:VNx2QI 98 v2 [orig:134 _1 ] [134])
+;; (mem/c:VNx2QI (reg:SI 13 a3 [155]) [1 %sfp+[-2, -2] S[2, 2] A8]))
+;; (clobber (reg:SI 14 a4 [149]))])
+;; So that we could be able to emit vsetvl instruction using clobber sratch a4.
+;; To let LRA generate the expected pattern, we should exclude fractional vector
+;; load/store in "*mov<mode>_whole". Otherwise, it will reload this pattern into:
+;; (insn 20 19 9 2 (set (reg:VNx2QI 98 v2 [orig:134 _1 ] [134])
+;; (mem/c:VNx2QI (reg:SI 13 a3 [155]) [1 %sfp+[-2, -2] S[2, 2] A8])))
+;; which is not the pattern we want.
+;; According the facts above, we make "*mov<mode>_whole" includes load/store/move for whole
+;; vector modes according to '-march' and "*mov<mode>_fract" only include fractional vector modes.
+(define_insn "*mov<mode>_whole"
+ [(set (match_operand:V_WHOLE 0 "reg_or_mem_operand" "=vr, m,vr")
+ (match_operand:V_WHOLE 1 "reg_or_mem_operand" " m,vr,vr"))]
+ "TARGET_VECTOR"
"@
vl%m1re<sew>.v\t%0,%1
vs%m1r.v\t%1,%0
@@ -74,18 +126,26 @@
[(set_attr "type" "vldr,vstr,vmov")
(set_attr "mode" "<MODE>")])
+(define_insn "*mov<mode>_fract"
+ [(set (match_operand:V_FRACT 0 "register_operand" "=vr")
+ (match_operand:V_FRACT 1 "register_operand" " vr"))]
+ "TARGET_VECTOR"
+ "vmv1r.v\t%0,%1"
+ [(set_attr "type" "vmov")
+ (set_attr "mode" "<MODE>")])
+
(define_expand "mov<mode>"
[(set (match_operand:VB 0 "reg_or_mem_operand")
(match_operand:VB 1 "vector_move_operand"))]
"TARGET_VECTOR"
{
if (riscv_vector::legitimize_move (operands[0], operands[1], <MODE>mode))
- DONE;
+ DONE;
})
(define_insn "*mov<mode>"
[(set (match_operand:VB 0 "register_operand" "=vr")
- (match_operand:VB 1 "register_operand" "vr"))]
+ (match_operand:VB 1 "register_operand" " vr"))]
"TARGET_VECTOR"
"vmv1r.v\t%0,%1"
[(set_attr "type" "vmov")
@@ -290,18 +350,18 @@
;; (const_int:QI N)]), -15 <= N < 16.
;; 2. (const_vector:VNx1SF repeat [
;; (const_double:SF 0.0 [0x0.0p+0])]).
-(define_insn "@pred_mov<mode>"
+(define_insn_and_split "@pred_mov<mode>"
[(set (match_operand:V 0 "nonimmediate_operand" "=vd, vr, m, vr, vr")
- (if_then_else:V
- (unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1, vmWc1, vmWc1, Wc1")
- (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK")
- (match_operand 5 "const_int_operand" " i, i, i, i, i")
- (match_operand 6 "const_int_operand" " i, i, i, i, i")
- (reg:SI VL_REGNUM)
- (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (match_operand:V 3 "vector_move_operand" " m, m, vr, vr, viWc0")
- (match_operand:V 2 "vector_merge_operand" " 0, vu, 0, vu0, vu0")))]
+ (if_then_else:V
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1, vmWc1, Wc1, Wc1")
+ (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 5 "const_int_operand" " i, i, i, i, i")
+ (match_operand 6 "const_int_operand" " i, i, i, i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operand:V 3 "vector_move_operand" " m, m, vr, vr, viWc0")
+ (match_operand:V 2 "vector_merge_operand" " 0, vu, vu0, vu0, vu0")))]
"TARGET_VECTOR"
"@
vle<sew>.v\t%0,%3%p1
@@ -309,31 +369,41 @@
vse<sew>.v\t%3,%0%p1
vmv.v.v\t%0,%3
vmv.v.i\t%0,v%3"
+ "&& register_operand (operands[0], <MODE>mode)
+ && register_operand (operands[3], <MODE>mode)
+ && satisfies_constraint_vu (operands[2])"
+ [(set (match_dup 0) (match_dup 3))]
+ ""
[(set_attr "type" "vlde,vlde,vste,vimov,vimov")
(set_attr "mode" "<MODE>")])
;; vlm.v/vsm.v/vmclr.m/vmset.m.
;; constraint alternative 0 match vlm.v.
-;; constraint alternative 2 match vsm.v.
+;; constraint alternative 1 match vsm.v.
;; constraint alternative 3 match vmclr.m.
;; constraint alternative 4 match vmset.m.
-(define_insn "@pred_mov<mode>"
- [(set (match_operand:VB 0 "nonimmediate_operand" "=vr, m, vr, vr")
- (if_then_else:VB
- (unspec:VB
- [(match_operand:VB 1 "vector_mask_operand" "Wc1, Wc1, Wc1, Wc1")
- (match_operand 4 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 5 "const_int_operand" " i, i, i, i")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (reg:SI VL_REGNUM)
- (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (match_operand:VB 3 "vector_move_operand" " m, vr, Wc0, Wc1")
- (match_operand:VB 2 "vector_merge_operand" " vu, 0, vu, vu")))]
+(define_insn_and_split "@pred_mov<mode>"
+ [(set (match_operand:VB 0 "nonimmediate_operand" "=vr, m, vr, vr, vr")
+ (if_then_else:VB
+ (unspec:VB
+ [(match_operand:VB 1 "vector_mask_operand" "Wc1, Wc1, Wc1, Wc1, Wc1")
+ (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 5 "const_int_operand" " i, i, i, i, i")
+ (match_operand 6 "const_int_operand" " i, i, i, i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operand:VB 3 "vector_move_operand" " m, vr, vr, Wc0, Wc1")
+ (match_operand:VB 2 "vector_merge_operand" " vu, vu0, vu, vu, vu")))]
"TARGET_VECTOR"
"@
vlm.v\t%0,%3
vsm.v\t%3,%0
+ #
vmclr.m\t%0
vmset.m\t%0"
- [(set_attr "type" "vldm,vstm,vmalu,vmalu")
+ "&& register_operand (operands[0], <MODE>mode)
+ && register_operand (operands[3], <MODE>mode)"
+ [(set (match_dup 0) (match_dup 3))]
+ ""
+ [(set_attr "type" "vldm,vstm,vimov,vmalu,vmalu")
(set_attr "mode" "<MODE>")])
new file mode 100644
@@ -0,0 +1,6 @@
+#define exhaust_vector_regs() \
+ asm volatile("#" :: \
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", \
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", \
+ "v26", "v27", "v28", "v29", "v30", "v31");
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -fno-schedule-insns -fno-schedule-insns2 " } */
/* { dg-final { check-function-bodies "**" "" } } */
#include <riscv_vector.h>
@@ -7,12 +7,12 @@
/* Test tieable of RVV types with same LMUL. */
/*
** mov1:
-** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1
** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au]
-** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),2
** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),2
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
** ret
*/
@@ -28,10 +28,10 @@ void mov1 (int8_t *in, int8_t *out, int M)
/*
** mov2:
-** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1
** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au]
** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
+** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\)
** ret
*/
new file mode 100644
@@ -0,0 +1,385 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "riscv_vector.h"
+#include "macro.h"
+
+/*
+** spill_1:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** ...
+** csrr\ta2,vlenb
+** srli\ta2,a2,3
+** slli\ta3,a2,3
+** sub\ta3,a3,a2
+** add\ta3,a3,sp
+** vse8.v\tv24,0\(a3\)
+** ...
+** csrr\ta2,vlenb
+** srli\ta2,a2,3
+** slli\ta3,a2,3
+** sub\ta3,a3,a2
+** add\ta3,a3,sp
+** vle8.v\tv24,0\(a3\)
+** vse8.v\tv24,0\(a1\)
+** csrr\tt0,vlenb
+** add\tsp,sp,t0
+** ...
+** jr\tra
+*/
+void
+spill_1 (int8_t *in, int8_t *out)
+{
+ vint8mf8_t v1 = *(vint8mf8_t*)in;
+ exhaust_vector_regs ();
+ *(vint8mf8_t*)out = v1;
+}
+
+/*
+** spill_2:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** vsetvli\ta5,zero,e8,mf4,ta,ma
+** vle8.v\tv24,0\(a0\)
+** csrr\ta2,vlenb
+** srli\ta2,a2,2
+** slli\ta3,a2,2
+** sub\ta3,a3,a2
+** add\ta3,a3,sp
+** vse8.v\tv24,0\(a3\)
+** ...
+** csrr\ta2,vlenb
+** srli\ta2,a2,2
+** slli\ta3,a2,2
+** sub\ta3,a3,a2
+** add\ta3,a3,sp
+** vle8.v\tv24,0\(a3\)
+** vse8.v\tv24,0\(a1\)
+** csrr\tt0,vlenb
+** add\tsp,sp,t0
+** ...
+** jr\tra
+*/
+void
+spill_2 (int8_t *in, int8_t *out)
+{
+ vint8mf4_t v1 = *(vint8mf4_t*)in;
+ exhaust_vector_regs ();
+ *(vint8mf4_t*)out = v1;
+}
+
+/*
+** spill_3:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** vsetvli\ta5,zero,e8,mf2,ta,ma
+** vle8.v\tv24,0\(a0\)
+** csrr\ta3,vlenb
+** srli\ta3,a3,1
+** add\ta3,a3,sp
+** vse8.v\tv24,0\(a3\)
+** ...
+** csrr\ta3,vlenb
+** srli\ta3,a3,1
+** add\ta3,a3,sp
+** vle8.v\tv24,0\(a3\)
+** vse8.v\tv24,0\(a1\)
+** csrr\tt0,vlenb
+** add\tsp,sp,t0
+** ...
+** jr\tra
+*/
+void
+spill_3 (int8_t *in, int8_t *out)
+{
+ vint8mf2_t v1 = *(vint8mf2_t*)in;
+ exhaust_vector_regs ();
+ *(vint8mf2_t*)out = v1;
+}
+
+/*
+** spill_4:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** ...
+** vs1r.v\tv24,0\(sp\)
+** ...
+** vl1re8.v\tv2,0\(sp\)
+** vs1r.v\tv2,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_4 (int8_t *in, int8_t *out)
+{
+ register vint8m1_t v1 asm("v1") = *(vint8m1_t*)in;
+ asm volatile ("# %0"::"vr"(v1));
+ exhaust_vector_regs ();
+ register vint8m1_t v2 asm("v2") = v1;
+ *(vint8m1_t*)out = v2;
+ asm volatile ("# %0"::"vr"(v2));
+}
+
+/*
+** spill_5:
+** csrr\tt0,vlenb
+** slli\tt1,t0,1
+** sub\tsp,sp,t1
+** ...
+** vs2r.v\tv24,0\(sp\)
+** ...
+** vl2re8.v\tv4,0\(sp\)
+** vs2r.v\tv4,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_5 (int8_t *in, int8_t *out)
+{
+ register vint8m2_t v2 asm("v2") = *(vint8m2_t*)in;
+ asm volatile ("# %0"::"vr"(v2));
+ exhaust_vector_regs ();
+ register vint8m2_t v4 asm("v4") = v2;
+ *(vint8m2_t*)out = v4;
+ asm volatile ("# %0"::"vr"(v4));
+}
+
+/*
+** spill_6:
+** csrr\tt0,vlenb
+** slli\tt1,t0,2
+** sub\tsp,sp,t1
+** ...
+** vs4r.v\tv24,0\(sp\)
+** ...
+** vl4re8.v\tv8,0\(sp\)
+** vs4r.v\tv8,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_6 (int8_t *in, int8_t *out)
+{
+ register vint8m4_t v4 asm("v4") = *(vint8m4_t*)in;
+ asm volatile ("# %0"::"vr"(v4));
+ exhaust_vector_regs ();
+ register vint8m4_t v8 asm("v8") = v4;
+ *(vint8m4_t*)out = v8;
+ asm volatile ("# %0"::"vr"(v8));
+}
+
+/*
+** spill_7:
+** csrr\tt0,vlenb
+** slli\tt1,t0,3
+** sub\tsp,sp,t1
+** ...
+** vs8r.v\tv24,0\(sp\)
+** ...
+** vl8re8.v\tv16,0\(sp\)
+** vs8r.v\tv16,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_7 (int8_t *in, int8_t *out)
+{
+ register vint8m8_t v8 asm("v8") = *(vint8m8_t*)in;
+ asm volatile ("# %0"::"vr"(v8));
+ exhaust_vector_regs ();
+ register vint8m8_t v16 asm("v16") = v8;
+ *(vint8m8_t*)out = v16;
+ asm volatile ("# %0"::"vr"(v16));
+}
+
+/*
+** spill_8:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** vsetvli\ta5,zero,e8,mf8,ta,ma
+** vle8.v\tv24,0\(a0\)
+** csrr\ta2,vlenb
+** srli\ta2,a2,3
+** slli\ta3,a2,3
+** sub\ta3,a3,a2
+** add\ta3,a3,sp
+** vse8.v\tv24,0\(a3\)
+** ...
+** csrr\ta2,vlenb
+** srli\ta2,a2,3
+** slli\ta3,a2,3
+** sub\ta3,a3,a2
+** add\ta3,a3,sp
+** vle8.v\tv24,0\(a3\)
+** vse8.v\tv24,0\(a1\)
+** csrr\tt0,vlenb
+** add\tsp,sp,t0
+** ...
+** jr\tra
+*/
+void
+spill_8 (uint8_t *in, uint8_t *out)
+{
+ vuint8mf8_t v1 = *(vuint8mf8_t*)in;
+ exhaust_vector_regs ();
+ *(vuint8mf8_t*)out = v1;
+}
+
+/*
+** spill_9:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** vsetvli\ta5,zero,e8,mf4,ta,ma
+** vle8.v\tv24,0\(a0\)
+** csrr\ta2,vlenb
+** srli\ta2,a2,2
+** slli\ta3,a2,2
+** sub\ta3,a3,a2
+** add\ta3,a3,sp
+** vse8.v\tv24,0\(a3\)
+** ...
+** csrr\ta2,vlenb
+** srli\ta2,a2,2
+** slli\ta3,a2,2
+** sub\ta3,a3,a2
+** add\ta3,a3,sp
+** vle8.v\tv24,0\(a3\)
+** vse8.v\tv24,0\(a1\)
+** csrr\tt0,vlenb
+** add\tsp,sp,t0
+** ...
+** jr\tra
+*/
+void
+spill_9 (uint8_t *in, uint8_t *out)
+{
+ vuint8mf4_t v1 = *(vuint8mf4_t*)in;
+ exhaust_vector_regs ();
+ *(vuint8mf4_t*)out = v1;
+}
+
+/*
+** spill_10:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** vsetvli\ta5,zero,e8,mf2,ta,ma
+** vle8.v\tv24,0\(a0\)
+** csrr\ta3,vlenb
+** srli\ta3,a3,1
+** add\ta3,a3,sp
+** vse8.v\tv24,0\(a3\)
+** ...
+** csrr\ta3,vlenb
+** srli\ta3,a3,1
+** add\ta3,a3,sp
+** vle8.v\tv24,0\(a3\)
+** vse8.v\tv24,0\(a1\)
+** csrr\tt0,vlenb
+** add\tsp,sp,t0
+** ...
+** jr\tra
+*/
+void
+spill_10 (uint8_t *in, uint8_t *out)
+{
+ vuint8mf2_t v1 = *(vuint8mf2_t*)in;
+ exhaust_vector_regs ();
+ *(vuint8mf2_t*)out = v1;
+}
+
+/*
+** spill_11:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** ...
+** vs1r.v\tv24,0\(sp\)
+** ...
+** vl1re8.v\tv2,0\(sp\)
+** vs1r.v\tv2,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_11 (uint8_t *in, uint8_t *out)
+{
+ register vuint8m1_t v1 asm("v1") = *(vuint8m1_t*)in;
+ asm volatile ("# %0"::"vr"(v1));
+ exhaust_vector_regs ();
+ register vuint8m1_t v2 asm("v2") = v1;
+ *(vuint8m1_t*)out = v2;
+ asm volatile ("# %0"::"vr"(v2));
+}
+
+/*
+** spill_12:
+** csrr\tt0,vlenb
+** slli\tt1,t0,1
+** sub\tsp,sp,t1
+** ...
+** vs2r.v\tv24,0\(sp\)
+** ...
+** vl2re8.v\tv4,0\(sp\)
+** vs2r.v\tv4,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_12 (uint8_t *in, uint8_t *out)
+{
+ register vuint8m2_t v2 asm("v2") = *(vuint8m2_t*)in;
+ asm volatile ("# %0"::"vr"(v2));
+ exhaust_vector_regs ();
+ register vuint8m2_t v4 asm("v4") = v2;
+ *(vuint8m2_t*)out = v4;
+ asm volatile ("# %0"::"vr"(v4));
+}
+
+/*
+** spill_13:
+** csrr\tt0,vlenb
+** slli\tt1,t0,2
+** sub\tsp,sp,t1
+** ...
+** vs4r.v\tv24,0\(sp\)
+** ...
+** vl4re8.v\tv8,0\(sp\)
+** vs4r.v\tv8,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_13 (uint8_t *in, uint8_t *out)
+{
+ register vuint8m4_t v4 asm("v4") = *(vuint8m4_t*)in;
+ asm volatile ("# %0"::"vr"(v4));
+ exhaust_vector_regs ();
+ register vuint8m4_t v8 asm("v8") = v4;
+ *(vuint8m4_t*)out = v8;
+ asm volatile ("# %0"::"vr"(v8));
+}
+
+/*
+** spill_14:
+** csrr\tt0,vlenb
+** slli\tt1,t0,3
+** sub\tsp,sp,t1
+** ...
+** vs8r.v\tv24,0\(sp\)
+** ...
+** vl8re8.v\tv16,0\(sp\)
+** vs8r.v\tv16,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_14 (uint8_t *in, uint8_t *out)
+{
+ register vuint8m8_t v8 asm("v8") = *(vuint8m8_t*)in;
+ asm volatile ("# %0"::"vr"(v8));
+ exhaust_vector_regs ();
+ register vuint8m8_t v16 asm("v16") = v8;
+ *(vuint8m8_t*)out = v16;
+ asm volatile ("# %0"::"vr"(v16));
+}
new file mode 100644
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -fno-schedule-insns -fno-schedule-insns2 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "riscv_vector.h"
+
+void f (char*);
+
+/*
+** stach_check_alloca_1:
+** addi\tsp,sp,-32
+** sw\tra,4\(sp\)
+** sw\ts0,0\(sp\)
+** addi\ts0,sp,8
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** ...
+** addi\ta2,a2,15
+** andi\ta2,a2,-8
+** sub\tsp,sp,a2
+** ...
+** lw\tra,4\(sp\)
+** lw\ts0,0\(sp\)
+** addi\tsp,sp,32
+** jr\tra
+*/
+void stach_check_alloca_1 (vuint8m1_t data, uint8_t *base, int y, ...)
+{
+ vuint8m8_t v0, v8, v16, v24;
+ asm volatile ("nop"
+ : "=vr" (v0), "=vr" (v8), "=vr" (v16), "=vr" (v24)
+ :
+ :);
+ asm volatile ("nop"
+ :
+ : "vr" (v0), "vr" (v8), "vr" (v16), "vr" (v24)
+ :);
+ *(vuint8m1_t *)base = data;
+ char* pStr = (char*)__builtin_alloca(y);
+ f(pStr);
+}
new file mode 100644
@@ -0,0 +1,60 @@
+/* { dg-do compile } */
+/* { dg-options "-msave-restore -march=rv32gcv -mabi=ilp32 -msave-restore -fno-schedule-insns -fno-schedule-insns2 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+void fn2 (float a1, float a2, float a3, float a4,
+ float a5, float a6, float a7, float a8);
+void fn3 (char*);
+
+/*
+** stack_save_restore_2:
+** call\tt0,__riscv_save_2
+** csrr\tt0,vlenb
+** slli\tt1,t0,1
+** sub\tsp,sp,t1
+** li\tt0,-8192
+** addi\tt0,t0,192
+** add\tsp,sp,t0
+** ...
+** csrr\tt0,vlenb
+** slli\tt1,t0,1
+** add\tsp,sp,t1
+** li\tt0,8192
+** addi\tt0,t0,-208
+** add\tsp,sp,t0
+** addi\tsp,sp,16
+** tail\t__riscv_restore_2
+*/
+int stack_save_restore_2 (float a1, float a2, float a3, float a4,
+ float a5, float a6, float a7, float a8,
+ vuint8m1_t data, uint8_t *base)
+{
+ char d[8000];
+ float f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
+ asm volatile ("nop"
+ : "=f" (f1), "=f" (f2), "=f" (f3), "=f" (f4), "=f" (f5), "=f" (f6),
+ "=f" (f7), "=f" (f8), "=f" (f9), "=f" (f10), "=f" (f11),
+ "=f" (f12), "=f" (f13)
+ :
+ :);
+ asm volatile ("nop"
+ :
+ : "f" (f1), "f" (f2), "f" (f3), "f" (f4), "f" (f5), "f" (f6),
+ "f" (f7), "f" (f8), "f" (f9), "f" (f10), "f" (f11),
+ "f" (f12), "f" (f13)
+ :);
+ vuint8m8_t v0, v8, v16, v24;
+ asm volatile ("nop"
+ : "=vr" (v0), "=vr" (v8), "=vr" (v16), "=vr" (v24)
+ :
+ :);
+ asm volatile ("nop"
+ :
+ : "vr" (v0), "vr" (v8), "vr" (v16), "vr" (v24)
+ :);
+ *(vuint8m1_t *)base = data;
+ fn2 (a1, a2, a3, a4, a5, a6, a7, a8);
+ fn3(d);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -msave-restore -fno-schedule-insns -fno-schedule-insns2 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+
+void fn2 (float a1, float a2, float a3, float a4,
+ float a5, float a6, float a7, float a8);
+void fn3 (char*);
+
+
+/*
+** stack_save_restore_1:
+** call\tt0,__riscv_save_0
+** li\tt0,-8192
+** addi\tt0,t0,192
+** add\tsp,sp,t0
+** ...
+** li\ta0,-8192
+** addi\ta0,a0,192
+** li\ta5,8192
+** addi\ta5,a5,-192
+** add\ta5,a5,a0
+** add\ta0,a5,sp
+** ...
+** tail\t__riscv_restore_0
+*/
+int stack_save_restore_1 (float a1, float a2, float a3, float a4,
+ float a5, float a6, float a7, float a8)
+{
+ char d[8000];
+ float f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
+ asm volatile ("nop"
+ : "=f" (f1), "=f" (f2), "=f" (f3), "=f" (f4), "=f" (f5), "=f" (f6),
+ "=f" (f7), "=f" (f8), "=f" (f9), "=f" (f10), "=f" (f11),
+ "=f" (f12), "=f" (f13)
+ :
+ :);
+ asm volatile ("nop"
+ :
+ : "f" (f1), "f" (f2), "f" (f3), "f" (f4), "f" (f5), "f" (f6),
+ "f" (f7), "f" (f8), "f" (f9), "f" (f10), "f" (f11),
+ "f" (f12), "f" (f13)
+ :);
+ fn2 (a1, a2, a3, a4, a5, a6, a7, a8);
+ fn3(d);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,320 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "riscv_vector.h"
+#include "macro.h"
+
+/*
+** spill_2:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** vsetvli\ta5,zero,e16,mf4,ta,ma
+** vle16.v\tv24,0\(a0\)
+** csrr\ta2,vlenb
+** srli\ta2,a2,2
+** slli\ta3,a2,2
+** sub\ta3,a3,a2
+** add\ta3,a3,sp
+** vse16.v\tv24,0\(a3\)
+** ...
+** csrr\ta2,vlenb
+** srli\ta2,a2,2
+** slli\ta3,a2,2
+** sub\ta3,a3,a2
+** add\ta3,a3,sp
+** vle16.v\tv24,0\(a3\)
+** vse16.v\tv24,0\(a1\)
+** csrr\tt0,vlenb
+** add\tsp,sp,t0
+** ...
+** jr\tra
+*/
+void
+spill_2 (int16_t *in, int16_t *out)
+{
+ vint16mf4_t v1 = *(vint16mf4_t*)in;
+ exhaust_vector_regs ();
+ *(vint16mf4_t*)out = v1;
+}
+
+/*
+** spill_3:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** vsetvli\ta5,zero,e16,mf2,ta,ma
+** vle16.v\tv24,0\(a0\)
+** csrr\ta3,vlenb
+** srli\ta3,a3,1
+** add\ta3,a3,sp
+** vse16.v\tv24,0\(a3\)
+** ...
+** csrr\ta3,vlenb
+** srli\ta3,a3,1
+** add\ta3,a3,sp
+** vle16.v\tv24,0\(a3\)
+** vse16.v\tv24,0\(a1\)
+** csrr\tt0,vlenb
+** add\tsp,sp,t0
+** ...
+** jr\tra
+*/
+void
+spill_3 (int16_t *in, int16_t *out)
+{
+ vint16mf2_t v1 = *(vint16mf2_t*)in;
+ exhaust_vector_regs ();
+ *(vint16mf2_t*)out = v1;
+}
+
+/*
+** spill_4:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** ...
+** vs1r.v\tv24,0\(sp\)
+** ...
+** vl1re16.v\tv2,0\(sp\)
+** vs1r.v\tv2,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_4 (int16_t *in, int16_t *out)
+{
+ register vint16m1_t v1 asm("v1") = *(vint16m1_t*)in;
+ asm volatile ("# %0"::"vr"(v1));
+ exhaust_vector_regs ();
+ register vint16m1_t v2 asm("v2") = v1;
+ *(vint16m1_t*)out = v2;
+ asm volatile ("# %0"::"vr"(v2));
+}
+
+/*
+** spill_5:
+** csrr\tt0,vlenb
+** slli\tt1,t0,1
+** sub\tsp,sp,t1
+** ...
+** vs2r.v\tv24,0\(sp\)
+** ...
+** vl2re16.v\tv4,0\(sp\)
+** vs2r.v\tv4,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_5 (int16_t *in, int16_t *out)
+{
+ register vint16m2_t v2 asm("v2") = *(vint16m2_t*)in;
+ asm volatile ("# %0"::"vr"(v2));
+ exhaust_vector_regs ();
+ register vint16m2_t v4 asm("v4") = v2;
+ *(vint16m2_t*)out = v4;
+ asm volatile ("# %0"::"vr"(v4));
+}
+
+/*
+** spill_6:
+** csrr\tt0,vlenb
+** slli\tt1,t0,2
+** sub\tsp,sp,t1
+** ...
+** vs4r.v\tv24,0\(sp\)
+** ...
+** vl4re16.v\tv8,0\(sp\)
+** vs4r.v\tv8,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_6 (int16_t *in, int16_t *out)
+{
+ register vint16m4_t v4 asm("v4") = *(vint16m4_t*)in;
+ asm volatile ("# %0"::"vr"(v4));
+ exhaust_vector_regs ();
+ register vint16m4_t v8 asm("v8") = v4;
+ *(vint16m4_t*)out = v8;
+ asm volatile ("# %0"::"vr"(v8));
+}
+
+/*
+** spill_7:
+** csrr\tt0,vlenb
+** slli\tt1,t0,3
+** sub\tsp,sp,t1
+** ...
+** vs8r.v\tv24,0\(sp\)
+** ...
+** vl8re16.v\tv16,0\(sp\)
+** vs8r.v\tv16,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_7 (int16_t *in, int16_t *out)
+{
+ register vint16m8_t v8 asm("v8") = *(vint16m8_t*)in;
+ asm volatile ("# %0"::"vr"(v8));
+ exhaust_vector_regs ();
+ register vint16m8_t v16 asm("v16") = v8;
+ *(vint16m8_t*)out = v16;
+ asm volatile ("# %0"::"vr"(v16));
+}
+
+/*
+** spill_9:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** vsetvli\ta5,zero,e16,mf4,ta,ma
+** vle16.v\tv24,0\(a0\)
+** csrr\ta2,vlenb
+** srli\ta2,a2,2
+** slli\ta3,a2,2
+** sub\ta3,a3,a2
+** add\ta3,a3,sp
+** vse16.v\tv24,0\(a3\)
+** ...
+** csrr\ta2,vlenb
+** srli\ta2,a2,2
+** slli\ta3,a2,2
+** sub\ta3,a3,a2
+** add\ta3,a3,sp
+** vle16.v\tv24,0\(a3\)
+** vse16.v\tv24,0\(a1\)
+** csrr\tt0,vlenb
+** add\tsp,sp,t0
+** ...
+** jr\tra
+*/
+void
+spill_9 (uint16_t *in, uint16_t *out)
+{
+ vuint16mf4_t v1 = *(vuint16mf4_t*)in;
+ exhaust_vector_regs ();
+ *(vuint16mf4_t*)out = v1;
+}
+
+/*
+** spill_10:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** vsetvli\ta5,zero,e16,mf2,ta,ma
+** vle16.v\tv24,0\(a0\)
+** csrr\ta3,vlenb
+** srli\ta3,a3,1
+** add\ta3,a3,sp
+** vse16.v\tv24,0\(a3\)
+** ...
+** csrr\ta3,vlenb
+** srli\ta3,a3,1
+** add\ta3,a3,sp
+** vle16.v\tv24,0\(a3\)
+** vse16.v\tv24,0\(a1\)
+** csrr\tt0,vlenb
+** add\tsp,sp,t0
+** ...
+** jr\tra
+*/
+void
+spill_10 (uint16_t *in, uint16_t *out)
+{
+ vuint16mf2_t v1 = *(vuint16mf2_t*)in;
+ exhaust_vector_regs ();
+ *(vuint16mf2_t*)out = v1;
+}
+
+/*
+** spill_11:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** ...
+** vs1r.v\tv24,0\(sp\)
+** ...
+** vl1re16.v\tv2,0\(sp\)
+** vs1r.v\tv2,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_11 (uint16_t *in, uint16_t *out)
+{
+ register vuint16m1_t v1 asm("v1") = *(vuint16m1_t*)in;
+ asm volatile ("# %0"::"vr"(v1));
+ exhaust_vector_regs ();
+ register vuint16m1_t v2 asm("v2") = v1;
+ *(vuint16m1_t*)out = v2;
+ asm volatile ("# %0"::"vr"(v2));
+}
+
+/*
+** spill_12:
+** csrr\tt0,vlenb
+** slli\tt1,t0,1
+** sub\tsp,sp,t1
+** ...
+** vs2r.v\tv24,0\(sp\)
+** ...
+** vl2re16.v\tv4,0\(sp\)
+** vs2r.v\tv4,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_12 (uint16_t *in, uint16_t *out)
+{
+ register vuint16m2_t v2 asm("v2") = *(vuint16m2_t*)in;
+ asm volatile ("# %0"::"vr"(v2));
+ exhaust_vector_regs ();
+ register vuint16m2_t v4 asm("v4") = v2;
+ *(vuint16m2_t*)out = v4;
+ asm volatile ("# %0"::"vr"(v4));
+}
+
+/*
+** spill_13:
+** csrr\tt0,vlenb
+** slli\tt1,t0,2
+** sub\tsp,sp,t1
+** ...
+** vs4r.v\tv24,0\(sp\)
+** ...
+** vl4re16.v\tv8,0\(sp\)
+** vs4r.v\tv8,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_13 (uint16_t *in, uint16_t *out)
+{
+ register vuint16m4_t v4 asm("v4") = *(vuint16m4_t*)in;
+ asm volatile ("# %0"::"vr"(v4));
+ exhaust_vector_regs ();
+ register vuint16m4_t v8 asm("v8") = v4;
+ *(vuint16m4_t*)out = v8;
+ asm volatile ("# %0"::"vr"(v8));
+}
+
+/*
+** spill_14:
+** csrr\tt0,vlenb
+** slli\tt1,t0,3
+** sub\tsp,sp,t1
+** ...
+** vs8r.v\tv24,0\(sp\)
+** ...
+** vl8re16.v\tv16,0\(sp\)
+** vs8r.v\tv16,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_14 (uint16_t *in, uint16_t *out)
+{
+ register vuint16m8_t v8 asm("v8") = *(vuint16m8_t*)in;
+ asm volatile ("# %0"::"vr"(v8));
+ exhaust_vector_regs ();
+ register vuint16m8_t v16 asm("v16") = v8;
+ *(vuint16m8_t*)out = v16;
+ asm volatile ("# %0"::"vr"(v16));
+}
new file mode 100644
@@ -0,0 +1,254 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "riscv_vector.h"
+#include "macro.h"
+
+/*
+** spill_3:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** vsetvli\ta5,zero,e32,mf2,ta,ma
+** vle32.v\tv24,0\(a0\)
+** csrr\ta3,vlenb
+** srli\ta3,a3,1
+** add\ta3,a3,sp
+** vse32.v\tv24,0\(a3\)
+** ...
+** csrr\ta3,vlenb
+** srli\ta3,a3,1
+** add\ta3,a3,sp
+** vle32.v\tv24,0\(a3\)
+** vse32.v\tv24,0\(a1\)
+** csrr\tt0,vlenb
+** add\tsp,sp,t0
+** ...
+** jr\tra
+*/
+void
+spill_3 (int32_t *in, int32_t *out)
+{
+ vint32mf2_t v1 = *(vint32mf2_t*)in;
+ exhaust_vector_regs ();
+ *(vint32mf2_t*)out = v1;
+}
+
+/*
+** spill_4:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** ...
+** vs1r.v\tv24,0\(sp\)
+** ...
+** vl1re32.v\tv2,0\(sp\)
+** vs1r.v\tv2,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_4 (int32_t *in, int32_t *out)
+{
+ register vint32m1_t v1 asm("v1") = *(vint32m1_t*)in;
+ asm volatile ("# %0"::"vr"(v1));
+ exhaust_vector_regs ();
+ register vint32m1_t v2 asm("v2") = v1;
+ *(vint32m1_t*)out = v2;
+ asm volatile ("# %0"::"vr"(v2));
+}
+
+/*
+** spill_5:
+** csrr\tt0,vlenb
+** slli\tt1,t0,1
+** sub\tsp,sp,t1
+** ...
+** vs2r.v\tv24,0\(sp\)
+** ...
+** vl2re32.v\tv4,0\(sp\)
+** vs2r.v\tv4,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_5 (int32_t *in, int32_t *out)
+{
+ register vint32m2_t v2 asm("v2") = *(vint32m2_t*)in;
+ asm volatile ("# %0"::"vr"(v2));
+ exhaust_vector_regs ();
+ register vint32m2_t v4 asm("v4") = v2;
+ *(vint32m2_t*)out = v4;
+ asm volatile ("# %0"::"vr"(v4));
+}
+
+/*
+** spill_6:
+** csrr\tt0,vlenb
+** slli\tt1,t0,2
+** sub\tsp,sp,t1
+** ...
+** vs4r.v\tv24,0\(sp\)
+** ...
+** vl4re32.v\tv8,0\(sp\)
+** vs4r.v\tv8,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_6 (int32_t *in, int32_t *out)
+{
+ register vint32m4_t v4 asm("v4") = *(vint32m4_t*)in;
+ asm volatile ("# %0"::"vr"(v4));
+ exhaust_vector_regs ();
+ register vint32m4_t v8 asm("v8") = v4;
+ *(vint32m4_t*)out = v8;
+ asm volatile ("# %0"::"vr"(v8));
+}
+
+/*
+** spill_7:
+** csrr\tt0,vlenb
+** slli\tt1,t0,3
+** sub\tsp,sp,t1
+** ...
+** vs8r.v\tv24,0\(sp\)
+** ...
+** vl8re32.v\tv16,0\(sp\)
+** vs8r.v\tv16,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_7 (int32_t *in, int32_t *out)
+{
+ register vint32m8_t v8 asm("v8") = *(vint32m8_t*)in;
+ asm volatile ("# %0"::"vr"(v8));
+ exhaust_vector_regs ();
+ register vint32m8_t v16 asm("v16") = v8;
+ *(vint32m8_t*)out = v16;
+ asm volatile ("# %0"::"vr"(v16));
+}
+
+/*
+** spill_10:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** vsetvli\ta5,zero,e32,mf2,ta,ma
+** vle32.v\tv24,0\(a0\)
+** csrr\ta3,vlenb
+** srli\ta3,a3,1
+** add\ta3,a3,sp
+** vse32.v\tv24,0\(a3\)
+** ...
+** csrr\ta3,vlenb
+** srli\ta3,a3,1
+** add\ta3,a3,sp
+** vle32.v\tv24,0\(a3\)
+** vse32.v\tv24,0\(a1\)
+** csrr\tt0,vlenb
+** add\tsp,sp,t0
+** ...
+** jr\tra
+*/
+void
+spill_10 (uint32_t *in, uint32_t *out)
+{
+ vuint32mf2_t v1 = *(vuint32mf2_t*)in;
+ exhaust_vector_regs ();
+ *(vuint32mf2_t*)out = v1;
+}
+
+/*
+** spill_11:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** ...
+** vs1r.v\tv24,0\(sp\)
+** ...
+** vl1re32.v\tv2,0\(sp\)
+** vs1r.v\tv2,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_11 (uint32_t *in, uint32_t *out)
+{
+ register vuint32m1_t v1 asm("v1") = *(vuint32m1_t*)in;
+ asm volatile ("# %0"::"vr"(v1));
+ exhaust_vector_regs ();
+ register vuint32m1_t v2 asm("v2") = v1;
+ *(vuint32m1_t*)out = v2;
+ asm volatile ("# %0"::"vr"(v2));
+}
+
+/*
+** spill_12:
+** csrr\tt0,vlenb
+** slli\tt1,t0,1
+** sub\tsp,sp,t1
+** ...
+** vs2r.v\tv24,0\(sp\)
+** ...
+** vl2re32.v\tv4,0\(sp\)
+** vs2r.v\tv4,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_12 (uint32_t *in, uint32_t *out)
+{
+ register vuint32m2_t v2 asm("v2") = *(vuint32m2_t*)in;
+ asm volatile ("# %0"::"vr"(v2));
+ exhaust_vector_regs ();
+ register vuint32m2_t v4 asm("v4") = v2;
+ *(vuint32m2_t*)out = v4;
+ asm volatile ("# %0"::"vr"(v4));
+}
+
+/*
+** spill_13:
+** csrr\tt0,vlenb
+** slli\tt1,t0,2
+** sub\tsp,sp,t1
+** ...
+** vs4r.v\tv24,0\(sp\)
+** ...
+** vl4re32.v\tv8,0\(sp\)
+** vs4r.v\tv8,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_13 (uint32_t *in, uint32_t *out)
+{
+ register vuint32m4_t v4 asm("v4") = *(vuint32m4_t*)in;
+ asm volatile ("# %0"::"vr"(v4));
+ exhaust_vector_regs ();
+ register vuint32m4_t v8 asm("v8") = v4;
+ *(vuint32m4_t*)out = v8;
+ asm volatile ("# %0"::"vr"(v8));
+}
+
+/*
+** spill_14:
+** csrr\tt0,vlenb
+** slli\tt1,t0,3
+** sub\tsp,sp,t1
+** ...
+** vs8r.v\tv24,0\(sp\)
+** ...
+** vl8re32.v\tv16,0\(sp\)
+** vs8r.v\tv16,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_14 (uint32_t *in, uint32_t *out)
+{
+ register vuint32m8_t v8 asm("v8") = *(vuint32m8_t*)in;
+ asm volatile ("# %0"::"vr"(v8));
+ exhaust_vector_regs ();
+ register vuint32m8_t v16 asm("v16") = v8;
+ *(vuint32m8_t*)out = v16;
+ asm volatile ("# %0"::"vr"(v16));
+}
new file mode 100644
@@ -0,0 +1,196 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "riscv_vector.h"
+#include "macro.h"
+
+/*
+** spill_4:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** ...
+** vs1r.v\tv24,0\(sp\)
+** ...
+** vl1re64.v\tv2,0\(sp\)
+** vs1r.v\tv2,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_4 (int64_t *in, int64_t *out)
+{
+ register vint64m1_t v1 asm("v1") = *(vint64m1_t*)in;
+ asm volatile ("# %0"::"vr"(v1));
+ exhaust_vector_regs ();
+ register vint64m1_t v2 asm("v2") = v1;
+ *(vint64m1_t*)out = v2;
+ asm volatile ("# %0"::"vr"(v2));
+}
+
+/*
+** spill_5:
+** csrr\tt0,vlenb
+** slli\tt1,t0,1
+** sub\tsp,sp,t1
+** ...
+** vs2r.v\tv24,0\(sp\)
+** ...
+** vl2re64.v\tv4,0\(sp\)
+** vs2r.v\tv4,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_5 (int64_t *in, int64_t *out)
+{
+ register vint64m2_t v2 asm("v2") = *(vint64m2_t*)in;
+ asm volatile ("# %0"::"vr"(v2));
+ exhaust_vector_regs ();
+ register vint64m2_t v4 asm("v4") = v2;
+ *(vint64m2_t*)out = v4;
+ asm volatile ("# %0"::"vr"(v4));
+}
+
+/*
+** spill_6:
+** csrr\tt0,vlenb
+** slli\tt1,t0,2
+** sub\tsp,sp,t1
+** ...
+** vs4r.v\tv24,0\(sp\)
+** ...
+** vl4re64.v\tv8,0\(sp\)
+** vs4r.v\tv8,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_6 (int64_t *in, int64_t *out)
+{
+ register vint64m4_t v4 asm("v4") = *(vint64m4_t*)in;
+ asm volatile ("# %0"::"vr"(v4));
+ exhaust_vector_regs ();
+ register vint64m4_t v8 asm("v8") = v4;
+ *(vint64m4_t*)out = v8;
+ asm volatile ("# %0"::"vr"(v8));
+}
+
+/*
+** spill_7:
+** csrr\tt0,vlenb
+** slli\tt1,t0,3
+** sub\tsp,sp,t1
+** ...
+** vs8r.v\tv24,0\(sp\)
+** ...
+** vl8re64.v\tv16,0\(sp\)
+** vs8r.v\tv16,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_7 (int64_t *in, int64_t *out)
+{
+ register vint64m8_t v8 asm("v8") = *(vint64m8_t*)in;
+ asm volatile ("# %0"::"vr"(v8));
+ exhaust_vector_regs ();
+ register vint64m8_t v16 asm("v16") = v8;
+ *(vint64m8_t*)out = v16;
+ asm volatile ("# %0"::"vr"(v16));
+}
+
+/*
+** spill_11:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** ...
+** vs1r.v\tv24,0\(sp\)
+** ...
+** vl1re64.v\tv2,0\(sp\)
+** vs1r.v\tv2,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_11 (uint64_t *in, uint64_t *out)
+{
+ register vuint64m1_t v1 asm("v1") = *(vuint64m1_t*)in;
+ asm volatile ("# %0"::"vr"(v1));
+ exhaust_vector_regs ();
+ register vuint64m1_t v2 asm("v2") = v1;
+ *(vuint64m1_t*)out = v2;
+ asm volatile ("# %0"::"vr"(v2));
+}
+
+/*
+** spill_12:
+** csrr\tt0,vlenb
+** slli\tt1,t0,1
+** sub\tsp,sp,t1
+** ...
+** vs2r.v\tv24,0\(sp\)
+** ...
+** vl2re64.v\tv4,0\(sp\)
+** vs2r.v\tv4,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_12 (uint64_t *in, uint64_t *out)
+{
+ register vuint64m2_t v2 asm("v2") = *(vuint64m2_t*)in;
+ asm volatile ("# %0"::"vr"(v2));
+ exhaust_vector_regs ();
+ register vuint64m2_t v4 asm("v4") = v2;
+ *(vuint64m2_t*)out = v4;
+ asm volatile ("# %0"::"vr"(v4));
+}
+
+/*
+** spill_13:
+** csrr\tt0,vlenb
+** slli\tt1,t0,2
+** sub\tsp,sp,t1
+** ...
+** vs4r.v\tv24,0\(sp\)
+** ...
+** vl4re64.v\tv8,0\(sp\)
+** vs4r.v\tv8,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_13 (uint64_t *in, uint64_t *out)
+{
+ register vuint64m4_t v4 asm("v4") = *(vuint64m4_t*)in;
+ asm volatile ("# %0"::"vr"(v4));
+ exhaust_vector_regs ();
+ register vuint64m4_t v8 asm("v8") = v4;
+ *(vuint64m4_t*)out = v8;
+ asm volatile ("# %0"::"vr"(v8));
+}
+
+/*
+** spill_14:
+** csrr\tt0,vlenb
+** slli\tt1,t0,3
+** sub\tsp,sp,t1
+** ...
+** vs8r.v\tv24,0\(sp\)
+** ...
+** vl8re64.v\tv16,0\(sp\)
+** vs8r.v\tv16,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_14 (uint64_t *in, uint64_t *out)
+{
+ register vuint64m8_t v8 asm("v8") = *(vuint64m8_t*)in;
+ asm volatile ("# %0"::"vr"(v8));
+ exhaust_vector_regs ();
+ register vuint64m8_t v16 asm("v16") = v8;
+ *(vuint64m8_t*)out = v16;
+ asm volatile ("# %0"::"vr"(v16));
+}
new file mode 100644
@@ -0,0 +1,130 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "riscv_vector.h"
+#include "macro.h"
+
+/*
+** spill_3:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** vsetvli\ta5,zero,e32,mf2,ta,ma
+** vle32.v\tv24,0\(a0\)
+** csrr\ta3,vlenb
+** srli\ta3,a3,1
+** add\ta3,a3,sp
+** vse32.v\tv24,0\(a3\)
+** ...
+** csrr\ta3,vlenb
+** srli\ta3,a3,1
+** add\ta3,a3,sp
+** vle32.v\tv24,0\(a3\)
+** vse32.v\tv24,0\(a1\)
+** csrr\tt0,vlenb
+** add\tsp,sp,t0
+** ...
+** jr\tra
+*/
+void
+spill_3 (float *in, float *out)
+{
+ vfloat32mf2_t v1 = *(vfloat32mf2_t*)in;
+ exhaust_vector_regs ();
+ *(vfloat32mf2_t*)out = v1;
+}
+
+/*
+** spill_4:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** ...
+** vs1r.v\tv24,0\(sp\)
+** ...
+** vl1re32.v\tv2,0\(sp\)
+** vs1r.v\tv2,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_4 (float *in, float *out)
+{
+ register vfloat32m1_t v1 asm("v1") = *(vfloat32m1_t*)in;
+ asm volatile ("# %0"::"vr"(v1));
+ exhaust_vector_regs ();
+ register vfloat32m1_t v2 asm("v2") = v1;
+ *(vfloat32m1_t*)out = v2;
+ asm volatile ("# %0"::"vr"(v2));
+}
+
+/*
+** spill_5:
+** csrr\tt0,vlenb
+** slli\tt1,t0,1
+** sub\tsp,sp,t1
+** ...
+** vs2r.v\tv24,0\(sp\)
+** ...
+** vl2re32.v\tv4,0\(sp\)
+** vs2r.v\tv4,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_5 (float *in, float *out)
+{
+ register vfloat32m2_t v2 asm("v2") = *(vfloat32m2_t*)in;
+ asm volatile ("# %0"::"vr"(v2));
+ exhaust_vector_regs ();
+ register vfloat32m2_t v4 asm("v4") = v2;
+ *(vfloat32m2_t*)out = v4;
+ asm volatile ("# %0"::"vr"(v4));
+}
+
+/*
+** spill_6:
+** csrr\tt0,vlenb
+** slli\tt1,t0,2
+** sub\tsp,sp,t1
+** ...
+** vs4r.v\tv24,0\(sp\)
+** ...
+** vl4re32.v\tv8,0\(sp\)
+** vs4r.v\tv8,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_6 (float *in, float *out)
+{
+ register vfloat32m4_t v4 asm("v4") = *(vfloat32m4_t*)in;
+ asm volatile ("# %0"::"vr"(v4));
+ exhaust_vector_regs ();
+ register vfloat32m4_t v8 asm("v8") = v4;
+ *(vfloat32m4_t*)out = v8;
+ asm volatile ("# %0"::"vr"(v8));
+}
+
+/*
+** spill_7:
+** csrr\tt0,vlenb
+** slli\tt1,t0,3
+** sub\tsp,sp,t1
+** ...
+** vs8r.v\tv24,0\(sp\)
+** ...
+** vl8re32.v\tv16,0\(sp\)
+** vs8r.v\tv16,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_7 (float *in, float *out)
+{
+ register vfloat32m8_t v8 asm("v8") = *(vfloat32m8_t*)in;
+ asm volatile ("# %0"::"vr"(v8));
+ exhaust_vector_regs ();
+ register vfloat32m8_t v16 asm("v16") = v8;
+ *(vfloat32m8_t*)out = v16;
+ asm volatile ("# %0"::"vr"(v16));
+}
new file mode 100644
@@ -0,0 +1,101 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "riscv_vector.h"
+#include "macro.h"
+
+/*
+** spill_4:
+** csrr\tt0,vlenb
+** sub\tsp,sp,t0
+** ...
+** vs1r.v\tv24,0\(sp\)
+** ...
+** vl1re64.v\tv2,0\(sp\)
+** vs1r.v\tv2,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_4 (double *in, double *out)
+{
+ register vfloat64m1_t v1 asm("v1") = *(vfloat64m1_t*)in;
+ asm volatile ("# %0"::"vr"(v1));
+ exhaust_vector_regs ();
+ register vfloat64m1_t v2 asm("v2") = v1;
+ *(vfloat64m1_t*)out = v2;
+ asm volatile ("# %0"::"vr"(v2));
+}
+
+/*
+** spill_5:
+** csrr\tt0,vlenb
+** slli\tt1,t0,1
+** sub\tsp,sp,t1
+** ...
+** vs2r.v\tv24,0\(sp\)
+** ...
+** vl2re64.v\tv4,0\(sp\)
+** vs2r.v\tv4,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_5 (double *in, double *out)
+{
+ register vfloat64m2_t v2 asm("v2") = *(vfloat64m2_t*)in;
+ asm volatile ("# %0"::"vr"(v2));
+ exhaust_vector_regs ();
+ register vfloat64m2_t v4 asm("v4") = v2;
+ *(vfloat64m2_t*)out = v4;
+ asm volatile ("# %0"::"vr"(v4));
+}
+
+/*
+** spill_6:
+** csrr\tt0,vlenb
+** slli\tt1,t0,2
+** sub\tsp,sp,t1
+** ...
+** vs4r.v\tv24,0\(sp\)
+** ...
+** vl4re64.v\tv8,0\(sp\)
+** vs4r.v\tv8,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_6 (double *in, double *out)
+{
+ register vfloat64m4_t v4 asm("v4") = *(vfloat64m4_t*)in;
+ asm volatile ("# %0"::"vr"(v4));
+ exhaust_vector_regs ();
+ register vfloat64m4_t v8 asm("v8") = v4;
+ *(vfloat64m4_t*)out = v8;
+ asm volatile ("# %0"::"vr"(v8));
+}
+
+/*
+** spill_7:
+** csrr\tt0,vlenb
+** slli\tt1,t0,3
+** sub\tsp,sp,t1
+** ...
+** vs8r.v\tv24,0\(sp\)
+** ...
+** vl8re64.v\tv16,0\(sp\)
+** vs8r.v\tv16,0\(a1\)
+** ...
+** jr\tra
+*/
+void
+spill_7 (double *in, double *out)
+{
+ register vfloat64m8_t v8 asm("v8") = *(vfloat64m8_t*)in;
+ asm volatile ("# %0"::"vr"(v8));
+ exhaust_vector_regs ();
+ register vfloat64m8_t v16 asm("v16") = v8;
+ *(vfloat64m8_t*)out = v16;
+ asm volatile ("# %0"::"vr"(v16));
+}
new file mode 100644
@@ -0,0 +1,114 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "riscv_vector.h"
+#include "macro.h"
+
+/*
+** spill:
+** csrr\tt0,vlenb
+** slli\tt1,t0,4
+** sub\tsp,sp,t1
+** vsetvli\ta3,zero,e8,mf8,ta,ma
+** vle8.v\tv24,0\(a0\)
+** csrr\ta5,vlenb
+** srli\ta5,a5,3
+** add\ta5,a5,sp
+** vse8.v\tv24,0\(a5\)
+** addi\ta5,a0,1
+** vsetvli\ta4,zero,e8,mf4,ta,ma
+** vle8.v\tv24,0\(a5\)
+** csrr\ta5,vlenb
+** srli\ta5,a5,2
+** add\ta5,a5,sp
+** vse8.v\tv24,0\(a5\)
+** addi\ta2,a0,2
+** vsetvli\ta5,zero,e8,mf2,ta,ma
+** vle8.v\tv24,0\(a2\)
+** csrr\ta2,vlenb
+** srli\ta2,a2,1
+** add\ta2,a2,sp
+** vse8.v\tv24,0\(a2\)
+** addi\ta2,a0,3
+** vl1re8.v\tv24,0\(a2\)
+** csrr\ta2,vlenb
+** add\ta2,a2,sp
+** vs1r.v\tv24,0\(a2\)
+** addi\ta2,a0,4
+** vl2re8.v\tv24,0\(a2\)
+** csrr\tt3,vlenb
+** slli\ta2,t3,1
+** add\ta2,a2,sp
+** vs2r.v\tv24,0\(a2\)
+** addi\ta2,a0,5
+** vl4re8.v\tv24,0\(a2\)
+** mv\ta2,t3
+** slli\tt3,t3,2
+** add\tt3,t3,sp
+** vs4r.v\tv24,0\(t3\)
+** addi\ta0,a0,6
+** vl8re8.v\tv24,0\(a0\)
+** slli\ta0,a2,3
+** add\ta0,a0,sp
+** vs8r.v\tv24,0\(a0\)
+** ...
+** srli\ta0,a2,3
+** add\ta0,a0,sp
+** ...
+** vle8.v\tv27,0\(a0\)
+** vse8.v\tv27,0\(a1\)
+** addi\ta3,a1,1
+** srli\ta0,a2,2
+** add\ta0,a0,sp
+** ...
+** vle8.v\tv27,0\(a0\)
+** vse8.v\tv27,0\(a3\)
+** addi\ta4,a1,2
+** srli\ta3,a2,1
+** add\ta3,a3,sp
+** ...
+** vle8.v\tv27,0\(a3\)
+** vse8.v\tv27,0\(a4\)
+** addi\ta5,a1,3
+** add\ta4,a2,sp
+** vl1re8.v\tv25,0\(a4\)
+** vs1r.v\tv25,0\(a5\)
+** addi\ta5,a1,4
+** slli\ta4,a2,1
+** add\ta4,a4,sp
+** vl2re8.v\tv26,0\(a4\)
+** vs2r.v\tv26,0\(a5\)
+** addi\ta5,a1,5
+** vl4re8.v\tv28,0\(t3\)
+** vs4r.v\tv28,0\(a5\)
+** addi\ta1,a1,6
+** slli\ta5,a2,3
+** add\ta5,a5,sp
+** vl8re8.v\tv24,0\(a5\)
+** vs8r.v\tv24,0\(a1\)
+** csrr\tt0,vlenb
+** slli\tt1,t0,4
+** add\tsp,sp,t1
+** ...
+** jr\tra
+*/
+void
+spill (int8_t *in, int8_t *out)
+{
+ vint8mf8_t v0 = *(vint8mf8_t*)in;
+ vint8mf4_t v1 = *(vint8mf4_t*)(in + 1);
+ vint8mf2_t v2 = *(vint8mf2_t*)(in + 2);
+ vint8m1_t v3 = *(vint8m1_t*)(in + 3);
+ vint8m2_t v4 = *(vint8m2_t*)(in + 4);
+ vint8m4_t v8 = *(vint8m4_t*)(in + 5);
+ vint8m8_t v16 = *(vint8m8_t*)(in + 6);
+ exhaust_vector_regs ();
+ *(vint8mf8_t*)out = v0;
+ *(vint8mf4_t*)(out + 1) = v1;
+ *(vint8mf2_t*)(out + 2) = v2;
+ *(vint8m1_t*)(out + 3) = v3;
+ *(vint8m2_t*)(out + 4) = v4;
+ *(vint8m4_t*)(out + 5) = v8;
+ *(vint8m8_t*)(out + 6) = v16;
+}
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void f2 (char*);
+void f3 (char*, ...);
+
+/*
+** stach_check_alloca_1:
+** addi sp,sp,-48
+** sw ra,12\(sp\)
+** sw s0,8\(sp\)
+** addi s0,sp,16
+** ...
+** addi a0,a0,23
+** andi a0,a0,-16
+** sub sp,sp,a0
+** ...
+** addi sp,s0,-16
+** lw ra,12\(sp\)
+** lw s0,8\(sp\)
+** addi sp,sp,48
+** jr ra
+*/
+void stach_check_alloca_1 (int y, ...)
+{
+ char* pStr = (char*)__builtin_alloca(y);
+ f2(pStr);
+}
+
+/*
+** stach_check_alloca_2:
+** addi sp,sp,-48
+** sw ra,44\(sp\)
+** sw s0,40\(sp\)
+** addi s0,sp,48
+** addi a0,a0,23
+** andi a0,a0,-16
+** sub sp,sp,a0
+** ...
+** addi sp,s0,-48
+** lw ra,44\(sp\)
+** lw s0,40\(sp\)
+** addi sp,sp,48
+** jr ra
+*/
+void stach_check_alloca_2 (int y)
+{
+ char* pStr = (char*)__builtin_alloca(y);
+ f3(pStr, pStr, pStr, pStr, pStr, pStr, pStr, pStr, 2, pStr, pStr, pStr, 1);
+}
new file mode 100644
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "riscv_vector.h"
+
+void f (char*);
+
+/*
+** stach_check_alloca_1:
+** addi\tsp,sp,-48
+** sw\tra,12\(sp\)
+** sw\ts0,8\(sp\)
+** addi\ts0,sp,16
+** csrr\tt0,vlenb
+** slli\tt1,t0,1
+** sub\tsp,sp,t1
+** ...
+** addi\ta2,a2,23
+** andi\ta2,a2,-16
+** sub\tsp,sp,a2
+** ...
+** lw\tra,12\(sp\)
+** lw\ts0,8\(sp\)
+** addi\tsp,sp,48
+** jr\tra
+*/
+void stach_check_alloca_1 (vuint8m1_t data, uint8_t *base, int y, ...)
+{
+ vuint8m8_t v0, v8, v16, v24;
+ asm volatile ("nop"
+ : "=vr" (v0), "=vr" (v8), "=vr" (v16), "=vr" (v24)
+ :
+ :);
+ asm volatile ("nop"
+ :
+ : "vr" (v0), "vr" (v8), "vr" (v16), "vr" (v24)
+ :);
+ *(vuint8m1_t *)base = data;
+ char* pStr = (char*)__builtin_alloca(y);
+ f(pStr);
+}