RISC-V: optimize stack manipulation in save-restore
Checks
Commit Message
The stack that save-restore reserves is not well accumulated in stack allocation and deallocation.
This patch allows less instructions to be used in stack allocation and deallocation if save-restore enabled,
and also a much clear logic for save-restore stack manipulation.
before patch:
bar:
call t0,__riscv_save_4
addi sp,sp,-64
...
li t0,-12288
addi t0,t0,-1968 # optimized out after patch
add sp,sp,t0 # prologue
...
li t0,12288 # epilogue
addi t0,t0,2000 # optimized out after patch
add sp,sp,t0
...
addi sp,sp,32
tail __riscv_restore_4
after patch:
bar:
call t0,__riscv_save_4
addi sp,sp,-2032
...
li t0,-12288
add sp,sp,t0 # prologue
...
li t0,12288 # epilogue
add sp,sp,t0
...
addi sp,sp,2032
tail __riscv_restore_4
gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_first_stack_step): add a new function parameter remaining_size.
(riscv_compute_frame_info): adapt new riscv_first_stack_step interface.
(riscv_expand_prologue): consider save-restore in stack allocation.
(riscv_expand_epilogue): consider save-restore in stack deallocation.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/stack_save_restore.c: New test.
---
gcc/config/riscv/riscv.cc | 58 ++++++++++---------
.../gcc.target/riscv/stack_save_restore.c | 40 +++++++++++++
2 files changed, 70 insertions(+), 28 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/stack_save_restore.c
Comments
On Wed, 30 Nov 2022 00:37:17 PST (-0800), gaofei@eswincomputing.com wrote:
> The stack that save-restore reserves is not well accumulated in stack allocation and deallocation.
> This patch allows less instructions to be used in stack allocation and deallocation if save-restore enabled,
> and also a much clear logic for save-restore stack manipulation.
>
> before patch:
> bar:
> call t0,__riscv_save_4
> addi sp,sp,-64
> ...
> li t0,-12288
> addi t0,t0,-1968 # optimized out after patch
> add sp,sp,t0 # prologue
> ...
> li t0,12288 # epilogue
> addi t0,t0,2000 # optimized out after patch
> add sp,sp,t0
> ...
> addi sp,sp,32
> tail __riscv_restore_4
>
> after patch:
> bar:
> call t0,__riscv_save_4
> addi sp,sp,-2032
> ...
> li t0,-12288
> add sp,sp,t0 # prologue
> ...
> li t0,12288 # epilogue
> add sp,sp,t0
> ...
> addi sp,sp,2032
> tail __riscv_restore_4
>
> gcc/ChangeLog:
>
> * config/riscv/riscv.cc (riscv_first_stack_step): add a new function parameter remaining_size.
> (riscv_compute_frame_info): adapt new riscv_first_stack_step interface.
> (riscv_expand_prologue): consider save-restore in stack allocation.
> (riscv_expand_epilogue): consider save-restore in stack deallocation.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/stack_save_restore.c: New test.
> ---
> gcc/config/riscv/riscv.cc | 58 ++++++++++---------
> .../gcc.target/riscv/stack_save_restore.c | 40 +++++++++++++
> 2 files changed, 70 insertions(+), 28 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/stack_save_restore.c
I guess with the RISC-V backend still being open for things as big as
the V port we should probably be taking code like this as well? I
wouldn't be opposed to making an exception for the V code and holding
everything else back, though.
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 05bdba5ab4d..9e92e729a5f 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -4634,7 +4634,7 @@ riscv_save_libcall_count (unsigned mask)
> They decrease stack_pointer_rtx but leave frame_pointer_rtx and
> hard_frame_pointer_rtx unchanged. */
>
> -static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame);
> +static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size);
>
> /* Handle stack align for poly_int. */
> static poly_int64
> @@ -4663,7 +4663,7 @@ riscv_compute_frame_info (void)
> save/restore t0. We check for this before clearing the frame struct. */
> if (cfun->machine->interrupt_handler_p)
> {
> - HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
> + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
> if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1)))
> interrupt_save_prologue_temp = true;
> }
> @@ -4913,31 +4913,31 @@ riscv_restore_reg (rtx reg, rtx mem)
> without adding extra instructions. */
>
> static HOST_WIDE_INT
> -riscv_first_stack_step (struct riscv_frame_info *frame)
> +riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size)
> {
> - HOST_WIDE_INT frame_total_constant_size;
> - if (!frame->total_size.is_constant ())
> - frame_total_constant_size
> - = riscv_stack_align (frame->total_size.coeffs[0])
> - - riscv_stack_align (frame->total_size.coeffs[1]);
> + HOST_WIDE_INT remaining_const_size;
> + if (!remaining_size.is_constant ())
> + remaining_const_size
> + = riscv_stack_align (remaining_size.coeffs[0])
> + - riscv_stack_align (remaining_size.coeffs[1]);
The alignment looks off here, at least in the email. Worth fixing it up
if you're touching the lines anyway.
> else
> - frame_total_constant_size = frame->total_size.to_constant ();
> + remaining_const_size = remaining_size.to_constant ();
>
> - if (SMALL_OPERAND (frame_total_constant_size))
> - return frame_total_constant_size;
> + if (SMALL_OPERAND (remaining_const_size))
> + return remaining_const_size;
>
> HOST_WIDE_INT min_first_step =
> - RISCV_STACK_ALIGN ((frame->total_size - frame->frame_pointer_offset).to_constant());
> + RISCV_STACK_ALIGN ((remaining_size - frame->frame_pointer_offset).to_constant());
> HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
> - HOST_WIDE_INT min_second_step = frame_total_constant_size - max_first_step;
> + HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step;
> gcc_assert (min_first_step <= max_first_step);
>
> /* As an optimization, use the least-significant bits of the total frame
> size, so that the second adjustment step is just LUI + ADD. */
> if (!SMALL_OPERAND (min_second_step)
> - && frame_total_constant_size % IMM_REACH < IMM_REACH / 2
> - && frame_total_constant_size % IMM_REACH >= min_first_step)
> - return frame_total_constant_size % IMM_REACH;
> + && remaining_const_size % IMM_REACH < IMM_REACH / 2
> + && remaining_const_size % IMM_REACH >= min_first_step)
> + return remaining_const_size % IMM_REACH;
Looks like this entire frame->total_size -> remaining_size conversion
could be done as an independent patch that would change no
functionality, that's always a nice way to do things as it makes the
code easier to read.
I spent a bit poking around here and nothing wrong is jumping out, but
trying to keep all these offset differences in my head is a bit tricky.
If you have the time to refactor this to be easier to read that'd be
great, otherwise hopefully I (or someone else) will have the time to
take a look -- probably not today on my end, though, as I've got some
Linux backlog to look at.
Thanks!
> if (TARGET_RVC)
> {
> @@ -5037,9 +5037,7 @@ riscv_expand_prologue (void)
> /* Save the registers. */
> if ((frame->mask | frame->fmask) != 0)
> {
> - HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
> - if (size.is_constant ())
> - step1 = MIN (size.to_constant(), step1);
> + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, size);
>
> insn = gen_add3_insn (stack_pointer_rtx,
> stack_pointer_rtx,
> @@ -5142,6 +5140,8 @@ riscv_expand_epilogue (int style)
> HOST_WIDE_INT step2 = 0;
> bool use_restore_libcall = ((style == NORMAL_RETURN)
> && riscv_use_save_libcall (frame));
> + unsigned libcall_size = use_restore_libcall ?
> + frame->save_libcall_adjustment : 0;
> rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
> rtx insn;
>
> @@ -5212,13 +5212,18 @@ riscv_expand_epilogue (int style)
> REG_NOTES (insn) = dwarf;
> }
>
> + if (use_restore_libcall)
> + frame->mask = 0; /* Temporarily fib for GPRs. */
> +
> /* If we need to restore registers, deallocate as much stack as
> possible in the second step without going out of range. */
> if ((frame->mask | frame->fmask) != 0)
> - {
> - step2 = riscv_first_stack_step (frame);
> - step1 -= step2;
> - }
> + step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size);
> +
> + if (use_restore_libcall)
> + frame->mask = mask; /* Undo the above fib. */
> +
> + step1 -= step2 + libcall_size;
>
> /* Set TARGET to BASE + STEP1. */
> if (known_gt (step1, 0))
> @@ -5272,15 +5277,12 @@ riscv_expand_epilogue (int style)
> frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
>
> /* Restore the registers. */
> - riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg,
> + riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size,
> + riscv_restore_reg,
> true, style == EXCEPTION_RETURN);
>
> if (use_restore_libcall)
> - {
> frame->mask = mask; /* Undo the above fib. */
> - gcc_assert (step2 >= frame->save_libcall_adjustment);
> - step2 -= frame->save_libcall_adjustment;
> - }
>
> if (need_barrier_p)
> riscv_emit_stack_tie ();
> diff --git a/gcc/testsuite/gcc.target/riscv/stack_save_restore.c b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c
> new file mode 100644
> index 00000000000..4695ef9469a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c
> @@ -0,0 +1,40 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32imafc -mabi=ilp32f -msave-restore -O2 -fno-schedule-insns -fno-schedule-insns2 -fno-unroll-loops -fno-peel-loops" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +char my_getchar();
> +float getf();
> +
> +/*
> +**bar:
> +** call t0,__riscv_save_4
> +** addi sp,sp,-2032
> +** ...
> +** li t0,-12288
> +** add sp,sp,t0
> +** ...
> +** li t0,12288
> +** add sp,sp,t0
> +** ...
> +** addi sp,sp,2032
> +** tail __riscv_restore_4
> +*/
The test needs to actually check this, it can't just be manual.
> +int bar()
> +{
> + float volatile farray[3568];
> +
> + float sum = 0;
> + float f1 = getf();
> + float f2 = getf();
> + float f3 = getf();
> + float f4 = getf();
> +
> + for (int i = 0; i < 3568; i++)
> + {
> + farray[i] = my_getchar() * 1.2;
> + sum += farray[i];
> + }
> +
> + return sum + f1 + f2 + f3 + f4;
> +}
> +
On 2022-12-01 06:50 Palmer Dabbelt <palmer@dabbelt.com> wrote:
>
>On Wed, 30 Nov 2022 00:37:17 PST (-0800), gaofei@eswincomputing.com wrote:
>> The stack that save-restore reserves is not well accumulated in stack allocation and deallocation.
>> This patch allows less instructions to be used in stack allocation and deallocation if save-restore enabled,
>> and also a much clear logic for save-restore stack manipulation.
>>
>> before patch:
>> bar:
>> call t0,__riscv_save_4
>> addi sp,sp,-64
>> ...
>> li t0,-12288
>> addi t0,t0,-1968 # optimized out after patch
>> add sp,sp,t0 # prologue
>> ...
>> li t0,12288 # epilogue
>> addi t0,t0,2000 # optimized out after patch
>> add sp,sp,t0
>> ...
>> addi sp,sp,32
>> tail __riscv_restore_4
>>
>> after patch:
>> bar:
>> call t0,__riscv_save_4
>> addi sp,sp,-2032
>> ...
>> li t0,-12288
>> add sp,sp,t0 # prologue
>> ...
>> li t0,12288 # epilogue
>> add sp,sp,t0
>> ...
>> addi sp,sp,2032
>> tail __riscv_restore_4
>>
>> gcc/ChangeLog:
>>
>> * config/riscv/riscv.cc (riscv_first_stack_step): add a new function parameter remaining_size.
>> (riscv_compute_frame_info): adapt new riscv_first_stack_step interface.
>> (riscv_expand_prologue): consider save-restore in stack allocation.
>> (riscv_expand_epilogue): consider save-restore in stack deallocation.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/riscv/stack_save_restore.c: New test.
>> ---
>> gcc/config/riscv/riscv.cc | 58 ++++++++++---------
>> .../gcc.target/riscv/stack_save_restore.c | 40 +++++++++++++
>> 2 files changed, 70 insertions(+), 28 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.target/riscv/stack_save_restore.c
>
>I guess with the RISC-V backend still being open for things as big as
>the V port we should probably be taking code like this as well? I
>wouldn't be opposed to making an exception for the V code and holding
>everything else back, though.
>
>> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
>> index 05bdba5ab4d..9e92e729a5f 100644
>> --- a/gcc/config/riscv/riscv.cc
>> +++ b/gcc/config/riscv/riscv.cc
>> @@ -4634,7 +4634,7 @@ riscv_save_libcall_count (unsigned mask)
>> They decrease stack_pointer_rtx but leave frame_pointer_rtx and
>> hard_frame_pointer_rtx unchanged. */
>>
>> -static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame);
>> +static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size);
>>
>> /* Handle stack align for poly_int. */
>> static poly_int64
>> @@ -4663,7 +4663,7 @@ riscv_compute_frame_info (void)
>> save/restore t0. We check for this before clearing the frame struct. */
>> if (cfun->machine->interrupt_handler_p)
>> {
>> - HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
>> + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
>> if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1)))
>> interrupt_save_prologue_temp = true;
>> }
>> @@ -4913,31 +4913,31 @@ riscv_restore_reg (rtx reg, rtx mem)
>> without adding extra instructions. */
>>
>> static HOST_WIDE_INT
>> -riscv_first_stack_step (struct riscv_frame_info *frame)
>> +riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size)
>> {
>> - HOST_WIDE_INT frame_total_constant_size;
>> - if (!frame->total_size.is_constant ())
>> - frame_total_constant_size
>> - = riscv_stack_align (frame->total_size.coeffs[0])
>> - - riscv_stack_align (frame->total_size.coeffs[1]);
>> + HOST_WIDE_INT remaining_const_size;
>> + if (!remaining_size.is_constant ())
>> + remaining_const_size
>> + = riscv_stack_align (remaining_size.coeffs[0])
>> + - riscv_stack_align (remaining_size.coeffs[1]);
>
>The alignment looks off here, at least in the email. Worth fixing it up
>if you're touching the lines anyway.
Sure, i will change RISCV_STACK_ALIGN into riscv_stack_align.
>
>> else
>> - frame_total_constant_size = frame->total_size.to_constant ();
>> + remaining_const_size = remaining_size.to_constant ();
>>
>> - if (SMALL_OPERAND (frame_total_constant_size))
>> - return frame_total_constant_size;
>> + if (SMALL_OPERAND (remaining_const_size))
>> + return remaining_const_size;
>>
>> HOST_WIDE_INT min_first_step =
>> - RISCV_STACK_ALIGN ((frame->total_size - frame->frame_pointer_offset).to_constant());
>> + RISCV_STACK_ALIGN ((remaining_size - frame->frame_pointer_offset).to_constant());
>> HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
>> - HOST_WIDE_INT min_second_step = frame_total_constant_size - max_first_step;
>> + HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step;
>> gcc_assert (min_first_step <= max_first_step);
>>
>> /* As an optimization, use the least-significant bits of the total frame
>> size, so that the second adjustment step is just LUI + ADD. */
>> if (!SMALL_OPERAND (min_second_step)
>> - && frame_total_constant_size % IMM_REACH < IMM_REACH / 2
>> - && frame_total_constant_size % IMM_REACH >= min_first_step)
>> - return frame_total_constant_size % IMM_REACH;
>> + && remaining_const_size % IMM_REACH < IMM_REACH / 2
>> + && remaining_const_size % IMM_REACH >= min_first_step)
>> + return remaining_const_size % IMM_REACH;
>
>Looks like this entire frame->total_size -> remaining_size conversion
>could be done as an independent patch that would change no
>functionality, that's always a nice way to do things as it makes the
>code easier to read.
Sure, i will split this patch.
>
>I spent a bit poking around here and nothing wrong is jumping out, but
>trying to keep all these offset differences in my head is a bit tricky.
>If you have the time to refactor this to be easier to read that'd be
>great, otherwise hopefully I (or someone else) will have the time to
>take a look -- probably not today on my end, though, as I've got some
>Linux backlog to look at.
I'll try it. Also i propose to add step0 for pre-allocated stack cases like save-restore lib call,
and the future Zcmp in Zc* extension if needed.
So we have a clear logic of manipulate stack step by step.
>
>Thanks!
>
>> if (TARGET_RVC)
>> {
>> @@ -5037,9 +5037,7 @@ riscv_expand_prologue (void)
>> /* Save the registers. */
>> if ((frame->mask | frame->fmask) != 0)
>> {
>> - HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
>> - if (size.is_constant ())
>> - step1 = MIN (size.to_constant(), step1);
>> + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, size);
>>
>> insn = gen_add3_insn (stack_pointer_rtx,
>> stack_pointer_rtx,
>> @@ -5142,6 +5140,8 @@ riscv_expand_epilogue (int style)
>> HOST_WIDE_INT step2 = 0;
>> bool use_restore_libcall = ((style == NORMAL_RETURN)
>> && riscv_use_save_libcall (frame));
>> + unsigned libcall_size = use_restore_libcall ?
>> + frame->save_libcall_adjustment : 0;
>> rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
>> rtx insn;
>>
>> @@ -5212,13 +5212,18 @@ riscv_expand_epilogue (int style)
>> REG_NOTES (insn) = dwarf;
>> }
>>
>> + if (use_restore_libcall)
>> + frame->mask = 0; /* Temporarily fib for GPRs. */
>> +
>> /* If we need to restore registers, deallocate as much stack as
>> possible in the second step without going out of range. */
>> if ((frame->mask | frame->fmask) != 0)
>> - {
>> - step2 = riscv_first_stack_step (frame);
>> - step1 -= step2;
>> - }
>> + step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size);
>> +
>> + if (use_restore_libcall)
>> + frame->mask = mask; /* Undo the above fib. */
>> +
>> + step1 -= step2 + libcall_size;
>>
>> /* Set TARGET to BASE + STEP1. */
>> if (known_gt (step1, 0))
>> @@ -5272,15 +5277,12 @@ riscv_expand_epilogue (int style)
>> frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
>>
>> /* Restore the registers. */
>> - riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg,
>> + riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size,
>> + riscv_restore_reg,
>> true, style == EXCEPTION_RETURN);
>>
>> if (use_restore_libcall)
>> - {
>> frame->mask = mask; /* Undo the above fib. */
>> - gcc_assert (step2 >= frame->save_libcall_adjustment);
>> - step2 -= frame->save_libcall_adjustment;
>> - }
>>
>> if (need_barrier_p)
>> riscv_emit_stack_tie ();
>> diff --git a/gcc/testsuite/gcc.target/riscv/stack_save_restore.c b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c
>> new file mode 100644
>> index 00000000000..4695ef9469a
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c
>> @@ -0,0 +1,40 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv32imafc -mabi=ilp32f -msave-restore -O2 -fno-schedule-insns -fno-schedule-insns2 -fno-unroll-loops -fno-peel-loops" } */
>> +/* { dg-final { check-function-bodies "**" "" } } */
>> +
>> +char my_getchar();
>> +float getf();
>> +
>> +/*
>> +**bar:
>> +** call t0,__riscv_save_4
>> +** addi sp,sp,-2032
>> +** ...
>> +** li t0,-12288
>> +** add sp,sp,t0
>> +** ...
>> +** li t0,12288
>> +** add sp,sp,t0
>> +** ...
>> +** addi sp,sp,2032
>> +** tail __riscv_restore_4
>> +*/
>
>The test needs to actually check this, it can't just be manual.
I didn't get your point.
The { dg-final { check-function-bodies "**" "" } } matches the output with the expected result above automatically.
Please let me know your idea.
Thanks & BR,
Fei
>
>> +int bar()
>> +{
>> + float volatile farray[3568];
>> +
>> + float sum = 0;
>> + float f1 = getf();
>> + float f2 = getf();
>> + float f3 = getf();
>> + float f4 = getf();
>> +
>> + for (int i = 0; i < 3568; i++)
>> + {
>> + farray[i] = my_getchar() * 1.2;
>> + sum += farray[i];
>> + }
>> +
>> + return sum + f1 + f2 + f3 + f4;
>> +}
>> +
Hi Palmer and all,
I have split the patches and triggerred a new thread.
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg297206.html
Could you please review at your convenience?
Thanks & BR,
Fei
On 2022-12-01 11:07 Fei Gao <gaofei@eswincomputing.com> wrote:
>
>On 2022-12-01 06:50 Palmer Dabbelt <palmer@dabbelt.com> wrote:
>>
>>On Wed, 30 Nov 2022 00:37:17 PST (-0800), gaofei@eswincomputing.com wrote:
>>> The stack that save-restore reserves is not well accumulated in stack allocation and deallocation.
>>> This patch allows less instructions to be used in stack allocation and deallocation if save-restore enabled,
>>> and also a much clear logic for save-restore stack manipulation.
>>>
>>> before patch:
>>> bar:
>>> call t0,__riscv_save_4
>>> addi sp,sp,-64
>>> ...
>>> li t0,-12288
>>> addi t0,t0,-1968 # optimized out after patch
>>> add sp,sp,t0 # prologue
>>> ...
>>> li t0,12288 # epilogue
>>> addi t0,t0,2000 # optimized out after patch
>>> add sp,sp,t0
>>> ...
>>> addi sp,sp,32
>>> tail __riscv_restore_4
>>>
>>> after patch:
>>> bar:
>>> call t0,__riscv_save_4
>>> addi sp,sp,-2032
>>> ...
>>> li t0,-12288
>>> add sp,sp,t0 # prologue
>>> ...
>>> li t0,12288 # epilogue
>>> add sp,sp,t0
>>> ...
>>> addi sp,sp,2032
>>> tail __riscv_restore_4
>>>
>>> gcc/ChangeLog:
>>>
>>> * config/riscv/riscv.cc (riscv_first_stack_step): add a new function parameter remaining_size.
>>> (riscv_compute_frame_info): adapt new riscv_first_stack_step interface.
>>> (riscv_expand_prologue): consider save-restore in stack allocation.
>>> (riscv_expand_epilogue): consider save-restore in stack deallocation.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> * gcc.target/riscv/stack_save_restore.c: New test.
>>> ---
>>> gcc/config/riscv/riscv.cc | 58 ++++++++++---------
>>> .../gcc.target/riscv/stack_save_restore.c | 40 +++++++++++++
>>> 2 files changed, 70 insertions(+), 28 deletions(-)
>>> create mode 100644 gcc/testsuite/gcc.target/riscv/stack_save_restore.c
>>
>>I guess with the RISC-V backend still being open for things as big as
>>the V port we should probably be taking code like this as well? I
>>wouldn't be opposed to making an exception for the V code and holding
>>everything else back, though.
>>
>>> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
>>> index 05bdba5ab4d..9e92e729a5f 100644
>>> --- a/gcc/config/riscv/riscv.cc
>>> +++ b/gcc/config/riscv/riscv.cc
>>> @@ -4634,7 +4634,7 @@ riscv_save_libcall_count (unsigned mask)
>>> They decrease stack_pointer_rtx but leave frame_pointer_rtx and
>>> hard_frame_pointer_rtx unchanged. */
>>>
>>> -static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame);
>>> +static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size);
>>>
>>> /* Handle stack align for poly_int. */
>>> static poly_int64
>>> @@ -4663,7 +4663,7 @@ riscv_compute_frame_info (void)
>>> save/restore t0. We check for this before clearing the frame struct. */
>>> if (cfun->machine->interrupt_handler_p)
>>> {
>>> - HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
>>> + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
>>> if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1)))
>>> interrupt_save_prologue_temp = true;
>>> }
>>> @@ -4913,31 +4913,31 @@ riscv_restore_reg (rtx reg, rtx mem)
>>> without adding extra instructions. */
>>>
>>> static HOST_WIDE_INT
>>> -riscv_first_stack_step (struct riscv_frame_info *frame)
>>> +riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size)
>>> {
>>> - HOST_WIDE_INT frame_total_constant_size;
>>> - if (!frame->total_size.is_constant ())
>>> - frame_total_constant_size
>>> - = riscv_stack_align (frame->total_size.coeffs[0])
>>> - - riscv_stack_align (frame->total_size.coeffs[1]);
>>> + HOST_WIDE_INT remaining_const_size;
>>> + if (!remaining_size.is_constant ())
>>> + remaining_const_size
>>> + = riscv_stack_align (remaining_size.coeffs[0])
>>> + - riscv_stack_align (remaining_size.coeffs[1]);
>>
>>The alignment looks off here, at least in the email. Worth fixing it up
>>if you're touching the lines anyway.
>
>Sure, i will change RISCV_STACK_ALIGN into riscv_stack_align.
>
>>
>>> else
>>> - frame_total_constant_size = frame->total_size.to_constant ();
>>> + remaining_const_size = remaining_size.to_constant ();
>>>
>>> - if (SMALL_OPERAND (frame_total_constant_size))
>>> - return frame_total_constant_size;
>>> + if (SMALL_OPERAND (remaining_const_size))
>>> + return remaining_const_size;
>>>
>>> HOST_WIDE_INT min_first_step =
>>> - RISCV_STACK_ALIGN ((frame->total_size - frame->frame_pointer_offset).to_constant());
>>> + RISCV_STACK_ALIGN ((remaining_size - frame->frame_pointer_offset).to_constant());
>>> HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
>>> - HOST_WIDE_INT min_second_step = frame_total_constant_size - max_first_step;
>>> + HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step;
>>> gcc_assert (min_first_step <= max_first_step);
>>>
>>> /* As an optimization, use the least-significant bits of the total frame
>>> size, so that the second adjustment step is just LUI + ADD. */
>>> if (!SMALL_OPERAND (min_second_step)
>>> - && frame_total_constant_size % IMM_REACH < IMM_REACH / 2
>>> - && frame_total_constant_size % IMM_REACH >= min_first_step)
>>> - return frame_total_constant_size % IMM_REACH;
>>> + && remaining_const_size % IMM_REACH < IMM_REACH / 2
>>> + && remaining_const_size % IMM_REACH >= min_first_step)
>>> + return remaining_const_size % IMM_REACH;
>>
>>Looks like this entire frame->total_size -> remaining_size conversion
>>could be done as an independent patch that would change no
>>functionality, that's always a nice way to do things as it makes the
>>code easier to read.
>
>Sure, i will split this patch.
>
>>
>>I spent a bit poking around here and nothing wrong is jumping out, but
>>trying to keep all these offset differences in my head is a bit tricky.
>>If you have the time to refactor this to be easier to read that'd be
>>great, otherwise hopefully I (or someone else) will have the time to
>>take a look -- probably not today on my end, though, as I've got some
>>Linux backlog to look at.
>
>I'll try it. Also i propose to add step0 for pre-allocated stack cases like save-restore lib call,
>and the future Zcmp in Zc* extension if needed.
>So we have a clear logic of manipulate stack step by step.
>
>>
>>Thanks!
>>
>>> if (TARGET_RVC)
>>> {
>>> @@ -5037,9 +5037,7 @@ riscv_expand_prologue (void)
>>> /* Save the registers. */
>>> if ((frame->mask | frame->fmask) != 0)
>>> {
>>> - HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
>>> - if (size.is_constant ())
>>> - step1 = MIN (size.to_constant(), step1);
>>> + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, size);
>>>
>>> insn = gen_add3_insn (stack_pointer_rtx,
>>> stack_pointer_rtx,
>>> @@ -5142,6 +5140,8 @@ riscv_expand_epilogue (int style)
>>> HOST_WIDE_INT step2 = 0;
>>> bool use_restore_libcall = ((style == NORMAL_RETURN)
>>> && riscv_use_save_libcall (frame));
>>> + unsigned libcall_size = use_restore_libcall ?
>>> + frame->save_libcall_adjustment : 0;
>>> rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
>>> rtx insn;
>>>
>>> @@ -5212,13 +5212,18 @@ riscv_expand_epilogue (int style)
>>> REG_NOTES (insn) = dwarf;
>>> }
>>>
>>> + if (use_restore_libcall)
>>> + frame->mask = 0; /* Temporarily fib for GPRs. */
>>> +
>>> /* If we need to restore registers, deallocate as much stack as
>>> possible in the second step without going out of range. */
>>> if ((frame->mask | frame->fmask) != 0)
>>> - {
>>> - step2 = riscv_first_stack_step (frame);
>>> - step1 -= step2;
>>> - }
>>> + step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size);
>>> +
>>> + if (use_restore_libcall)
>>> + frame->mask = mask; /* Undo the above fib. */
>>> +
>>> + step1 -= step2 + libcall_size;
>>>
>>> /* Set TARGET to BASE + STEP1. */
>>> if (known_gt (step1, 0))
>>> @@ -5272,15 +5277,12 @@ riscv_expand_epilogue (int style)
>>> frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
>>>
>>> /* Restore the registers. */
>>> - riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg,
>>> + riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size,
>>> + riscv_restore_reg,
>>> true, style == EXCEPTION_RETURN);
>>>
>>> if (use_restore_libcall)
>>> - {
>>> frame->mask = mask; /* Undo the above fib. */
>>> - gcc_assert (step2 >= frame->save_libcall_adjustment);
>>> - step2 -= frame->save_libcall_adjustment;
>>> - }
>>>
>>> if (need_barrier_p)
>>> riscv_emit_stack_tie ();
>>> diff --git a/gcc/testsuite/gcc.target/riscv/stack_save_restore.c b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c
>>> new file mode 100644
>>> index 00000000000..4695ef9469a
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c
>>> @@ -0,0 +1,40 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-march=rv32imafc -mabi=ilp32f -msave-restore -O2 -fno-schedule-insns -fno-schedule-insns2 -fno-unroll-loops -fno-peel-loops" } */
>>> +/* { dg-final { check-function-bodies "**" "" } } */
>>> +
>>> +char my_getchar();
>>> +float getf();
>>> +
>>> +/*
>>> +**bar:
>>> +** call t0,__riscv_save_4
>>> +** addi sp,sp,-2032
>>> +** ...
>>> +** li t0,-12288
>>> +** add sp,sp,t0
>>> +** ...
>>> +** li t0,12288
>>> +** add sp,sp,t0
>>> +** ...
>>> +** addi sp,sp,2032
>>> +** tail __riscv_restore_4
>>> +*/
>>
>>The test needs to actually check this, it can't just be manual.
>
>I didn't get your point.
>The { dg-final { check-function-bodies "**" "" } } matches the output with the expected result above automatically.
>Please let me know your idea.
>
>Thanks & BR,
>Fei
>
>>
>>> +int bar()
>>> +{
>>> + float volatile farray[3568];
>>> +
>>> + float sum = 0;
>>> + float f1 = getf();
>>> + float f2 = getf();
>>> + float f3 = getf();
>>> + float f4 = getf();
>>> +
>>> + for (int i = 0; i < 3568; i++)
>>> + {
>>> + farray[i] = my_getchar() * 1.2;
>>> + sum += farray[i];
>>> + }
>>> +
>>> + return sum + f1 + f2 + f3 + f4;
>>> +}
>>> +
@@ -4634,7 +4634,7 @@ riscv_save_libcall_count (unsigned mask)
They decrease stack_pointer_rtx but leave frame_pointer_rtx and
hard_frame_pointer_rtx unchanged. */
-static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame);
+static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size);
/* Handle stack align for poly_int. */
static poly_int64
@@ -4663,7 +4663,7 @@ riscv_compute_frame_info (void)
save/restore t0. We check for this before clearing the frame struct. */
if (cfun->machine->interrupt_handler_p)
{
- HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
+ HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1)))
interrupt_save_prologue_temp = true;
}
@@ -4913,31 +4913,31 @@ riscv_restore_reg (rtx reg, rtx mem)
without adding extra instructions. */
static HOST_WIDE_INT
-riscv_first_stack_step (struct riscv_frame_info *frame)
+riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size)
{
- HOST_WIDE_INT frame_total_constant_size;
- if (!frame->total_size.is_constant ())
- frame_total_constant_size
- = riscv_stack_align (frame->total_size.coeffs[0])
- - riscv_stack_align (frame->total_size.coeffs[1]);
+ HOST_WIDE_INT remaining_const_size;
+ if (!remaining_size.is_constant ())
+ remaining_const_size
+ = riscv_stack_align (remaining_size.coeffs[0])
+ - riscv_stack_align (remaining_size.coeffs[1]);
else
- frame_total_constant_size = frame->total_size.to_constant ();
+ remaining_const_size = remaining_size.to_constant ();
- if (SMALL_OPERAND (frame_total_constant_size))
- return frame_total_constant_size;
+ if (SMALL_OPERAND (remaining_const_size))
+ return remaining_const_size;
HOST_WIDE_INT min_first_step =
- RISCV_STACK_ALIGN ((frame->total_size - frame->frame_pointer_offset).to_constant());
+ RISCV_STACK_ALIGN ((remaining_size - frame->frame_pointer_offset).to_constant());
HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
- HOST_WIDE_INT min_second_step = frame_total_constant_size - max_first_step;
+ HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step;
gcc_assert (min_first_step <= max_first_step);
/* As an optimization, use the least-significant bits of the total frame
size, so that the second adjustment step is just LUI + ADD. */
if (!SMALL_OPERAND (min_second_step)
- && frame_total_constant_size % IMM_REACH < IMM_REACH / 2
- && frame_total_constant_size % IMM_REACH >= min_first_step)
- return frame_total_constant_size % IMM_REACH;
+ && remaining_const_size % IMM_REACH < IMM_REACH / 2
+ && remaining_const_size % IMM_REACH >= min_first_step)
+ return remaining_const_size % IMM_REACH;
if (TARGET_RVC)
{
@@ -5037,9 +5037,7 @@ riscv_expand_prologue (void)
/* Save the registers. */
if ((frame->mask | frame->fmask) != 0)
{
- HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
- if (size.is_constant ())
- step1 = MIN (size.to_constant(), step1);
+ HOST_WIDE_INT step1 = riscv_first_stack_step (frame, size);
insn = gen_add3_insn (stack_pointer_rtx,
stack_pointer_rtx,
@@ -5142,6 +5140,8 @@ riscv_expand_epilogue (int style)
HOST_WIDE_INT step2 = 0;
bool use_restore_libcall = ((style == NORMAL_RETURN)
&& riscv_use_save_libcall (frame));
+ unsigned libcall_size = use_restore_libcall ?
+ frame->save_libcall_adjustment : 0;
rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
rtx insn;
@@ -5212,13 +5212,18 @@ riscv_expand_epilogue (int style)
REG_NOTES (insn) = dwarf;
}
+ if (use_restore_libcall)
+ frame->mask = 0; /* Temporarily fib for GPRs. */
+
/* If we need to restore registers, deallocate as much stack as
possible in the second step without going out of range. */
if ((frame->mask | frame->fmask) != 0)
- {
- step2 = riscv_first_stack_step (frame);
- step1 -= step2;
- }
+ step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size);
+
+ if (use_restore_libcall)
+ frame->mask = mask; /* Undo the above fib. */
+
+ step1 -= step2 + libcall_size;
/* Set TARGET to BASE + STEP1. */
if (known_gt (step1, 0))
@@ -5272,15 +5277,12 @@ riscv_expand_epilogue (int style)
frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
/* Restore the registers. */
- riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg,
+ riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size,
+ riscv_restore_reg,
true, style == EXCEPTION_RETURN);
if (use_restore_libcall)
- {
frame->mask = mask; /* Undo the above fib. */
- gcc_assert (step2 >= frame->save_libcall_adjustment);
- step2 -= frame->save_libcall_adjustment;
- }
if (need_barrier_p)
riscv_emit_stack_tie ();
new file mode 100644
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32imafc -mabi=ilp32f -msave-restore -O2 -fno-schedule-insns -fno-schedule-insns2 -fno-unroll-loops -fno-peel-loops" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+char my_getchar();
+float getf();
+
+/*
+**bar:
+** call t0,__riscv_save_4
+** addi sp,sp,-2032
+** ...
+** li t0,-12288
+** add sp,sp,t0
+** ...
+** li t0,12288
+** add sp,sp,t0
+** ...
+** addi sp,sp,2032
+** tail __riscv_restore_4
+*/
+int bar()
+{
+ float volatile farray[3568];
+
+ float sum = 0;
+ float f1 = getf();
+ float f2 = getf();
+ float f3 = getf();
+ float f4 = getf();
+
+ for (int i = 0; i < 3568; i++)
+ {
+ farray[i] = my_getchar() * 1.2;
+ sum += farray[i];
+ }
+
+ return sum + f1 + f2 + f3 + f4;
+}
+