[3/4,RISC-V] resolve confilct between zcmp multi push/pop and shrink-wrap-separate
Checks
Commit Message
Disable zcmp multi push/pop if shrink-wrap-separate is active.
So in -Os that prefers smaller code size, by default shrink-wrap-separate
is disabled while zcmp multi push/pop is enabled.
And in -O2 and others that prefers speed, by default shrink-wrap-separate
is enabled while zcmp multi push/pop is disabled. To force enabling zcmp multi
push/pop in this case, -fno-shrink-wrap-separate has to be explictly given.
The following TC shows the issues in -O2 before this patch with both
shrink-wrap-separate and zcmp multi push/pop active.
1. duplicated store of s regs.
2. cm.push pushes ra, s0-s11 in reverse order than what normal
prologue does, causing stack corruption and failure to resotre s regs.
TC: zcmp_shrink_wrap_separate.c included in this patch.
output asm before this patch:
calc_func:
cm.push {ra, s0-s3}, -32
...
beq a5,zero,.L2
...
.L2:
...
sw s1,20(sp) //issue here
sw s3,12(sp) //issue here
...
sw s2,16(sp) //issue here
output asm after this patch:
calc_func:
addi sp,sp,-32
sw s0,24(sp)
...
beq a5,zero,.L2
...
.L2:
...
sw s1,20(sp)
sw s3,12(sp)
...
sw s2,16(sp)
gcc/ChangeLog:
* config/riscv/riscv.cc
(riscv_avoid_shrink_wrapping_separate): wrap the condition check in
riscv_avoid_shrink_wrapping_separate.
(riscv_avoid_multi_push): avoid multi push if shrink_wrapping_separate
is active.
(riscv_get_separate_components): call riscv_avoid_shrink_wrapping_separate
* shrink-wrap.cc (try_shrink_wrapping_separate): call
use_shrink_wrapping_separate.
(use_shrink_wrapping_separate):wrap the condition
check in use_shrink_wrapping_separate
* shrink-wrap.h (use_shrink_wrapping_separate): add to extern
gcc/testsuite/ChangeLog:
* gcc.target/riscv/zcmp_shrink_wrap_separate.c: New test.
* gcc.target/riscv/zcmp_shrink_wrap_separate2.c: New test.
Signed-off-by: Fei Gao <gaofei@eswincomputing.com>
Co-Authored-By: Zhangjin Liao <liaozhangjin@eswincomputing.com>
---
gcc/config/riscv/riscv.cc | 19 +++-
gcc/shrink-wrap.cc | 25 +++--
gcc/shrink-wrap.h | 1 +
.../riscv/zcmp_shrink_wrap_separate.c | 97 +++++++++++++++++++
.../riscv/zcmp_shrink_wrap_separate2.c | 97 +++++++++++++++++++
5 files changed, 228 insertions(+), 11 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate.c
create mode 100644 gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate2.c
Comments
I would suggest breaking this patch into two parts: RISC-V part and
the rest part (shrink-wrap.h / shrink-wrap.cc).
On Wed, Jun 7, 2023 at 1:55 PM Fei Gao <gaofei@eswincomputing.com> wrote:
>
> Disable zcmp multi push/pop if shrink-wrap-separate is active.
>
> So in -Os that prefers smaller code size, by default shrink-wrap-separate
> is disabled while zcmp multi push/pop is enabled.
>
> And in -O2 and others that prefers speed, by default shrink-wrap-separate
> is enabled while zcmp multi push/pop is disabled. To force enabling zcmp multi
> push/pop in this case, -fno-shrink-wrap-separate has to be explictly given.
>
> The following TC shows the issues in -O2 before this patch with both
> shrink-wrap-separate and zcmp multi push/pop active.
> 1. duplicated store of s regs.
> 2. cm.push pushes ra, s0-s11 in reverse order than what normal
> prologue does, causing stack corruption and failure to resotre s regs.
>
> TC: zcmp_shrink_wrap_separate.c included in this patch.
>
> output asm before this patch:
> calc_func:
> cm.push {ra, s0-s3}, -32
> ...
> beq a5,zero,.L2
> ...
> .L2:
> ...
> sw s1,20(sp) //issue here
> sw s3,12(sp) //issue here
> ...
> sw s2,16(sp) //issue here
>
> output asm after this patch:
> calc_func:
> addi sp,sp,-32
> sw s0,24(sp)
> ...
> beq a5,zero,.L2
> ...
> .L2:
> ...
> sw s1,20(sp)
> sw s3,12(sp)
> ...
> sw s2,16(sp)
> gcc/ChangeLog:
>
> * config/riscv/riscv.cc
> (riscv_avoid_shrink_wrapping_separate): wrap the condition check in
> riscv_avoid_shrink_wrapping_separate.
> (riscv_avoid_multi_push): avoid multi push if shrink_wrapping_separate
> is active.
> (riscv_get_separate_components): call riscv_avoid_shrink_wrapping_separate
> * shrink-wrap.cc (try_shrink_wrapping_separate): call
> use_shrink_wrapping_separate.
> (use_shrink_wrapping_separate):wrap the condition
> check in use_shrink_wrapping_separate
> * shrink-wrap.h (use_shrink_wrapping_separate): add to extern
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/zcmp_shrink_wrap_separate.c: New test.
> * gcc.target/riscv/zcmp_shrink_wrap_separate2.c: New test.
>
> Signed-off-by: Fei Gao <gaofei@eswincomputing.com>
> Co-Authored-By: Zhangjin Liao <liaozhangjin@eswincomputing.com>
> ---
> gcc/config/riscv/riscv.cc | 19 +++-
> gcc/shrink-wrap.cc | 25 +++--
> gcc/shrink-wrap.h | 1 +
> .../riscv/zcmp_shrink_wrap_separate.c | 97 +++++++++++++++++++
> .../riscv/zcmp_shrink_wrap_separate2.c | 97 +++++++++++++++++++
> 5 files changed, 228 insertions(+), 11 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate2.c
>
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index f60c241a526..b505cdeca34 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -64,6 +64,7 @@ along with GCC; see the file COPYING3. If not see
> #include "cfghooks.h"
> #include "cfgloop.h"
> #include "cfgrtl.h"
> +#include "shrink-wrap.h"
> #include "sel-sched.h"
> #include "fold-const.h"
> #include "gimple-iterator.h"
> @@ -389,6 +390,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
> false, /* use_divmod_expansion */
> };
>
> +static bool riscv_avoid_shrink_wrapping_separate ();
> static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
> static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
>
> @@ -4910,6 +4912,8 @@ riscv_avoid_multi_push(const struct riscv_frame_info *frame)
> || cfun->machine->interrupt_handler_p
> || cfun->machine->varargs_size != 0
> || crtl->args.pretend_args_size != 0
> + || (use_shrink_wrapping_separate ()
> + && !riscv_avoid_shrink_wrapping_separate ())
> || (frame->mask & ~ MULTI_PUSH_GPR_MASK))
> return true;
>
> @@ -6077,6 +6081,17 @@ riscv_epilogue_uses (unsigned int regno)
> return false;
> }
>
> +static bool
> +riscv_avoid_shrink_wrapping_separate ()
> +{
> + if (riscv_use_save_libcall (&cfun->machine->frame)
> + || cfun->machine->interrupt_handler_p
> + || !cfun->machine->frame.gp_sp_offset.is_constant ())
> + return true;
> +
> + return false;
> +}
> +
> /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
>
> static sbitmap
> @@ -6086,9 +6101,7 @@ riscv_get_separate_components (void)
> sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
> bitmap_clear (components);
>
> - if (riscv_use_save_libcall (&cfun->machine->frame)
> - || cfun->machine->interrupt_handler_p
> - || !cfun->machine->frame.gp_sp_offset.is_constant ())
> + if (riscv_avoid_shrink_wrapping_separate ())
> return components;
>
> offset = cfun->machine->frame.gp_sp_offset.to_constant ();
> diff --git a/gcc/shrink-wrap.cc b/gcc/shrink-wrap.cc
> index b8d7b557130..d534964321a 100644
> --- a/gcc/shrink-wrap.cc
> +++ b/gcc/shrink-wrap.cc
> @@ -1776,16 +1776,14 @@ insert_prologue_epilogue_for_components (sbitmap components)
> commit_edge_insertions ();
> }
>
> -/* The main entry point to this subpass. FIRST_BB is where the prologue
> - would be normally put. */
> -void
> -try_shrink_wrapping_separate (basic_block first_bb)
> +bool
> +use_shrink_wrapping_separate (void)
> {
> if (!(SHRINK_WRAPPING_ENABLED
> - && flag_shrink_wrap_separate
> - && optimize_function_for_speed_p (cfun)
> - && targetm.shrink_wrap.get_separate_components))
> - return;
> + && flag_shrink_wrap_separate
> + && optimize_function_for_speed_p (cfun)
> + && targetm.shrink_wrap.get_separate_components))
> + return false;
>
> /* We don't handle "strange" functions. */
> if (cfun->calls_alloca
> @@ -1794,6 +1792,17 @@ try_shrink_wrapping_separate (basic_block first_bb)
> || crtl->calls_eh_return
> || crtl->has_nonlocal_goto
> || crtl->saves_all_registers)
> + return false;
> +
> + return true;
> +}
> +
> +/* The main entry point to this subpass. FIRST_BB is where the prologue
> + would be normally put. */
> +void
> +try_shrink_wrapping_separate (basic_block first_bb)
> +{
> + if (!use_shrink_wrapping_separate ())
> return;
>
> /* Ask the target what components there are. If it returns NULL, don't
> diff --git a/gcc/shrink-wrap.h b/gcc/shrink-wrap.h
> index 161647711a3..82386c2b712 100644
> --- a/gcc/shrink-wrap.h
> +++ b/gcc/shrink-wrap.h
> @@ -26,6 +26,7 @@ along with GCC; see the file COPYING3. If not see
> extern bool requires_stack_frame_p (rtx_insn *, HARD_REG_SET, HARD_REG_SET);
> extern void try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq);
> extern void try_shrink_wrapping_separate (basic_block first_bb);
> +extern bool use_shrink_wrapping_separate (void);
> #define SHRINK_WRAPPING_ENABLED \
> (flag_shrink_wrap && targetm.have_simple_return ())
>
> diff --git a/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate.c b/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate.c
> new file mode 100644
> index 00000000000..11f87aee607
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate.c
> @@ -0,0 +1,97 @@
> +/* { dg-do compile } */
> +/* { dg-options " -O2 -march=rv32imaf_zca_zcmp -mabi=ilp32f" } */
> +/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
> +
> +typedef struct MAT_PARAMS_S
> +{
> + int N;
> + signed short *A;
> + signed short *B;
> + signed int *C;
> +} mat_params;
> +
> +typedef struct CORE_PORTABLE_S
> +{
> + unsigned char portable_id;
> +} core_portable;
> +
> +typedef struct RESULTS_S
> +{
> + /* inputs */
> + signed short seed1; /* Initializing seed */
> + signed short seed2; /* Initializing seed */
> + signed short seed3; /* Initializing seed */
> + void * memblock[4]; /* Pointer to safe memory location */
> + unsigned int size; /* Size of the data */
> + unsigned int iterations; /* Number of iterations to execute */
> + unsigned int execs; /* Bitmask of operations to execute */
> + struct list_head_s *list;
> + mat_params mat;
> + /* outputs */
> + unsigned short crc;
> + unsigned short crclist;
> + unsigned short crcmatrix;
> + unsigned short crcstate;
> + signed short err;
> + /* ultithread specific */
> + core_portable port;
> +} core_results;
> +
> +extern signed short
> +core_bench_state(unsigned int, void *, signed short, signed short, signed short, unsigned short);
> +
> +extern signed short
> +core_bench_matrix(mat_params *, signed short, unsigned short);
> +
> +extern unsigned short
> +crcu16(signed short, unsigned short);
> +
> +signed short
> +calc_func(signed short *pdata, core_results *res)
> +{
> + signed short data = *pdata;
> + signed short retval;
> + unsigned char optype
> + = (data >> 7)
> + & 1; /* bit 7 indicates if the function result has been cached */
> + if (optype) /* if cached, use cache */
> + return (data & 0x007f);
> + else
> + { /* otherwise calculate and cache the result */
> + signed short flag = data & 0x7; /* bits 0-2 is type of function to perform */
> + signed short dtype
> + = ((data >> 3)
> + & 0xf); /* bits 3-6 is specific data for the operation */
> + dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */
> + switch (flag)
> + {
> + case 0:
> + if (dtype < 0x22) /* set min period for bit corruption */
> + dtype = 0x22;
> + retval = core_bench_state(res->size,
> + res->memblock[3],
> + res->seed1,
> + res->seed2,
> + dtype,
> + res->crc);
> + if (res->crcstate == 0)
> + res->crcstate = retval;
> + break;
> + case 1:
> + retval = core_bench_matrix(&(res->mat), dtype, res->crc);
> + if (res->crcmatrix == 0)
> + res->crcmatrix = retval;
> + break;
> + default:
> + retval = data;
> + break;
> + }
> + res->crc = crcu16(retval, res->crc);
> + retval &= 0x007f;
> + *pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */
> + return retval;
> + }
> +}
> +
> +/* { dg-final { scan-assembler-not "cm\.push" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate2.c b/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate2.c
> new file mode 100644
> index 00000000000..ec7e9c39b5d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate2.c
> @@ -0,0 +1,97 @@
> +/* { dg-do compile } */
> +/* { dg-options " -O2 -fno-shrink-wrap-separate -march=rv32imaf_zca_zcmp -mabi=ilp32f" } */
> +/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
> +
> +typedef struct MAT_PARAMS_S
> +{
> + int N;
> + signed short *A;
> + signed short *B;
> + signed int *C;
> +} mat_params;
> +
> +typedef struct CORE_PORTABLE_S
> +{
> + unsigned char portable_id;
> +} core_portable;
> +
> +typedef struct RESULTS_S
> +{
> + /* inputs */
> + signed short seed1; /* Initializing seed */
> + signed short seed2; /* Initializing seed */
> + signed short seed3; /* Initializing seed */
> + void * memblock[4]; /* Pointer to safe memory location */
> + unsigned int size; /* Size of the data */
> + unsigned int iterations; /* Number of iterations to execute */
> + unsigned int execs; /* Bitmask of operations to execute */
> + struct list_head_s *list;
> + mat_params mat;
> + /* outputs */
> + unsigned short crc;
> + unsigned short crclist;
> + unsigned short crcmatrix;
> + unsigned short crcstate;
> + signed short err;
> + /* ultithread specific */
> + core_portable port;
> +} core_results;
> +
> +extern signed short
> +core_bench_state(unsigned int, void *, signed short, signed short, signed short, unsigned short);
> +
> +extern signed short
> +core_bench_matrix(mat_params *, signed short, unsigned short);
> +
> +extern unsigned short
> +crcu16(signed short, unsigned short);
> +
> +signed short
> +calc_func(signed short *pdata, core_results *res)
> +{
> + signed short data = *pdata;
> + signed short retval;
> + unsigned char optype
> + = (data >> 7)
> + & 1; /* bit 7 indicates if the function result has been cached */
> + if (optype) /* if cached, use cache */
> + return (data & 0x007f);
> + else
> + { /* otherwise calculate and cache the result */
> + signed short flag = data & 0x7; /* bits 0-2 is type of function to perform */
> + signed short dtype
> + = ((data >> 3)
> + & 0xf); /* bits 3-6 is specific data for the operation */
> + dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */
> + switch (flag)
> + {
> + case 0:
> + if (dtype < 0x22) /* set min period for bit corruption */
> + dtype = 0x22;
> + retval = core_bench_state(res->size,
> + res->memblock[3],
> + res->seed1,
> + res->seed2,
> + dtype,
> + res->crc);
> + if (res->crcstate == 0)
> + res->crcstate = retval;
> + break;
> + case 1:
> + retval = core_bench_matrix(&(res->mat), dtype, res->crc);
> + if (res->crcmatrix == 0)
> + res->crcmatrix = retval;
> + break;
> + default:
> + retval = data;
> + break;
> + }
> + res->crc = crcu16(retval, res->crc);
> + retval &= 0x007f;
> + *pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */
> + return retval;
> + }
> +}
> +
> +/* { dg-final { scan-assembler "cm\.push" } } */
> +
> --
> 2.17.1
>
On 6/6/23 23:52, Fei Gao wrote:
> Disable zcmp multi push/pop if shrink-wrap-separate is active.
>
> So in -Os that prefers smaller code size, by default shrink-wrap-separate
> is disabled while zcmp multi push/pop is enabled.
>
> And in -O2 and others that prefers speed, by default shrink-wrap-separate
> is enabled while zcmp multi push/pop is disabled. To force enabling zcmp multi
> push/pop in this case, -fno-shrink-wrap-separate has to be explictly given.
>
> The following TC shows the issues in -O2 before this patch with both
> shrink-wrap-separate and zcmp multi push/pop active.
> 1. duplicated store of s regs.
> 2. cm.push pushes ra, s0-s11 in reverse order than what normal
> prologue does, causing stack corruption and failure to resotre s regs.
>
> TC: zcmp_shrink_wrap_separate.c included in this patch.
>
> output asm before this patch:
> calc_func:
> cm.push {ra, s0-s3}, -32
> ...
> beq a5,zero,.L2
> ...
> .L2:
> ...
> sw s1,20(sp) //issue here
> sw s3,12(sp) //issue here
> ...
> sw s2,16(sp) //issue here
>
> output asm after this patch:
> calc_func:
> addi sp,sp,-32
> sw s0,24(sp)
> ...
> beq a5,zero,.L2
> ...
> .L2:
> ...
> sw s1,20(sp)
> sw s3,12(sp)
> ...
> sw s2,16(sp)
> gcc/ChangeLog:
>
> * config/riscv/riscv.cc
> (riscv_avoid_shrink_wrapping_separate): wrap the condition check in
> riscv_avoid_shrink_wrapping_separate.
> (riscv_avoid_multi_push): avoid multi push if shrink_wrapping_separate
> is active.
> (riscv_get_separate_components): call riscv_avoid_shrink_wrapping_separate
> * shrink-wrap.cc (try_shrink_wrapping_separate): call
> use_shrink_wrapping_separate.
> (use_shrink_wrapping_separate):wrap the condition
> check in use_shrink_wrapping_separate
> * shrink-wrap.h (use_shrink_wrapping_separate): add to extern
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/zcmp_shrink_wrap_separate.c: New test.
> * gcc.target/riscv/zcmp_shrink_wrap_separate2.c: New test.
I know Kito asked for this to be broken up into target dependent vs
target independent changes, that's a good ask.
Can't we utilize the get_separate_components hook to accomplish what
you're trying to do? ie, put the logic to avoid shrink wrapping for
this case within the existing risc-v hook?
jeff
On 2023-06-13 03:26 Jeff Law <jeffreyalaw@gmail.com> wrote:
>
>
>
>On 6/6/23 23:52, Fei Gao wrote:
>> Disable zcmp multi push/pop if shrink-wrap-separate is active.
>>
>> So in -Os that prefers smaller code size, by default shrink-wrap-separate
>> is disabled while zcmp multi push/pop is enabled.
>>
>> And in -O2 and others that prefers speed, by default shrink-wrap-separate
>> is enabled while zcmp multi push/pop is disabled. To force enabling zcmp multi
>> push/pop in this case, -fno-shrink-wrap-separate has to be explictly given.
>>
>> The following TC shows the issues in -O2 before this patch with both
>> shrink-wrap-separate and zcmp multi push/pop active.
>> 1. duplicated store of s regs.
>> 2. cm.push pushes ra, s0-s11 in reverse order than what normal
>> prologue does, causing stack corruption and failure to resotre s regs.
>>
>> TC: zcmp_shrink_wrap_separate.c included in this patch.
>>
>> output asm before this patch:
>> calc_func:
>> cm.push {ra, s0-s3}, -32
>> ...
>> beq a5,zero,.L2
>> ...
>> .L2:
>> ...
>> sw s1,20(sp) //issue here
>> sw s3,12(sp) //issue here
>> ...
>> sw s2,16(sp) //issue here
>>
>> output asm after this patch:
>> calc_func:
>> addi sp,sp,-32
>> sw s0,24(sp)
>> ...
>> beq a5,zero,.L2
>> ...
>> .L2:
>> ...
>> sw s1,20(sp)
>> sw s3,12(sp)
>> ...
>> sw s2,16(sp)
>> gcc/ChangeLog:
>>
>> * config/riscv/riscv.cc
>> (riscv_avoid_shrink_wrapping_separate): wrap the condition check in
>> riscv_avoid_shrink_wrapping_separate.
>> (riscv_avoid_multi_push): avoid multi push if shrink_wrapping_separate
>> is active.
>> (riscv_get_separate_components): call riscv_avoid_shrink_wrapping_separate
>> * shrink-wrap.cc (try_shrink_wrapping_separate): call
>> use_shrink_wrapping_separate.
>> (use_shrink_wrapping_separate):wrap the condition
>> check in use_shrink_wrapping_separate
>> * shrink-wrap.h (use_shrink_wrapping_separate): add to extern
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/riscv/zcmp_shrink_wrap_separate.c: New test.
>> * gcc.target/riscv/zcmp_shrink_wrap_separate2.c: New test.
>I know Kito asked for this to be broken up into target dependent vs
>target independent changes, that's a good ask.
>
>Can't we utilize the get_separate_components hook to accomplish what
>you're trying to do? ie, put the logic to avoid shrink wrapping for
>this case within the existing risc-v hook?
Thank Jeff and Kito for your comments.
My first try was to avoid shrink wrapping if zcmp is enabled.
But after discussion with Kito and Andrew Pinski, I realized it's better to disable
zcmp push and pops if shrink wrapping is active.
For detailed discussion, please check link below.
thread: [PATCH 1/2] [RISC-V] disable shrink-wrap-separate if zcmp enabled.
link: https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg307203.html
I will go ahead with Kito's advice if you're fine with the current solution.
Thanks.
BR,
Fei
>
>jeff
@@ -64,6 +64,7 @@ along with GCC; see the file COPYING3. If not see
#include "cfghooks.h"
#include "cfgloop.h"
#include "cfgrtl.h"
+#include "shrink-wrap.h"
#include "sel-sched.h"
#include "fold-const.h"
#include "gimple-iterator.h"
@@ -389,6 +390,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
false, /* use_divmod_expansion */
};
+static bool riscv_avoid_shrink_wrapping_separate ();
static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
@@ -4910,6 +4912,8 @@ riscv_avoid_multi_push(const struct riscv_frame_info *frame)
|| cfun->machine->interrupt_handler_p
|| cfun->machine->varargs_size != 0
|| crtl->args.pretend_args_size != 0
+ || (use_shrink_wrapping_separate ()
+ && !riscv_avoid_shrink_wrapping_separate ())
|| (frame->mask & ~ MULTI_PUSH_GPR_MASK))
return true;
@@ -6077,6 +6081,17 @@ riscv_epilogue_uses (unsigned int regno)
return false;
}
+static bool
+riscv_avoid_shrink_wrapping_separate ()
+{
+ if (riscv_use_save_libcall (&cfun->machine->frame)
+ || cfun->machine->interrupt_handler_p
+ || !cfun->machine->frame.gp_sp_offset.is_constant ())
+ return true;
+
+ return false;
+}
+
/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
static sbitmap
@@ -6086,9 +6101,7 @@ riscv_get_separate_components (void)
sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
bitmap_clear (components);
- if (riscv_use_save_libcall (&cfun->machine->frame)
- || cfun->machine->interrupt_handler_p
- || !cfun->machine->frame.gp_sp_offset.is_constant ())
+ if (riscv_avoid_shrink_wrapping_separate ())
return components;
offset = cfun->machine->frame.gp_sp_offset.to_constant ();
@@ -1776,16 +1776,14 @@ insert_prologue_epilogue_for_components (sbitmap components)
commit_edge_insertions ();
}
-/* The main entry point to this subpass. FIRST_BB is where the prologue
- would be normally put. */
-void
-try_shrink_wrapping_separate (basic_block first_bb)
+bool
+use_shrink_wrapping_separate (void)
{
if (!(SHRINK_WRAPPING_ENABLED
- && flag_shrink_wrap_separate
- && optimize_function_for_speed_p (cfun)
- && targetm.shrink_wrap.get_separate_components))
- return;
+ && flag_shrink_wrap_separate
+ && optimize_function_for_speed_p (cfun)
+ && targetm.shrink_wrap.get_separate_components))
+ return false;
/* We don't handle "strange" functions. */
if (cfun->calls_alloca
@@ -1794,6 +1792,17 @@ try_shrink_wrapping_separate (basic_block first_bb)
|| crtl->calls_eh_return
|| crtl->has_nonlocal_goto
|| crtl->saves_all_registers)
+ return false;
+
+ return true;
+}
+
+/* The main entry point to this subpass. FIRST_BB is where the prologue
+ would be normally put. */
+void
+try_shrink_wrapping_separate (basic_block first_bb)
+{
+ if (!use_shrink_wrapping_separate ())
return;
/* Ask the target what components there are. If it returns NULL, don't
@@ -26,6 +26,7 @@ along with GCC; see the file COPYING3. If not see
extern bool requires_stack_frame_p (rtx_insn *, HARD_REG_SET, HARD_REG_SET);
extern void try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq);
extern void try_shrink_wrapping_separate (basic_block first_bb);
+extern bool use_shrink_wrapping_separate (void);
#define SHRINK_WRAPPING_ENABLED \
(flag_shrink_wrap && targetm.have_simple_return ())
new file mode 100644
@@ -0,0 +1,97 @@
+/* { dg-do compile } */
+/* { dg-options " -O2 -march=rv32imaf_zca_zcmp -mabi=ilp32f" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
+
+typedef struct MAT_PARAMS_S
+{
+ int N;
+ signed short *A;
+ signed short *B;
+ signed int *C;
+} mat_params;
+
+typedef struct CORE_PORTABLE_S
+{
+ unsigned char portable_id;
+} core_portable;
+
+typedef struct RESULTS_S
+{
+ /* inputs */
+ signed short seed1; /* Initializing seed */
+ signed short seed2; /* Initializing seed */
+ signed short seed3; /* Initializing seed */
+ void * memblock[4]; /* Pointer to safe memory location */
+ unsigned int size; /* Size of the data */
+ unsigned int iterations; /* Number of iterations to execute */
+ unsigned int execs; /* Bitmask of operations to execute */
+ struct list_head_s *list;
+ mat_params mat;
+ /* outputs */
+ unsigned short crc;
+ unsigned short crclist;
+ unsigned short crcmatrix;
+ unsigned short crcstate;
+ signed short err;
+ /* ultithread specific */
+ core_portable port;
+} core_results;
+
+extern signed short
+core_bench_state(unsigned int, void *, signed short, signed short, signed short, unsigned short);
+
+extern signed short
+core_bench_matrix(mat_params *, signed short, unsigned short);
+
+extern unsigned short
+crcu16(signed short, unsigned short);
+
+signed short
+calc_func(signed short *pdata, core_results *res)
+{
+ signed short data = *pdata;
+ signed short retval;
+ unsigned char optype
+ = (data >> 7)
+ & 1; /* bit 7 indicates if the function result has been cached */
+ if (optype) /* if cached, use cache */
+ return (data & 0x007f);
+ else
+ { /* otherwise calculate and cache the result */
+ signed short flag = data & 0x7; /* bits 0-2 is type of function to perform */
+ signed short dtype
+ = ((data >> 3)
+ & 0xf); /* bits 3-6 is specific data for the operation */
+ dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */
+ switch (flag)
+ {
+ case 0:
+ if (dtype < 0x22) /* set min period for bit corruption */
+ dtype = 0x22;
+ retval = core_bench_state(res->size,
+ res->memblock[3],
+ res->seed1,
+ res->seed2,
+ dtype,
+ res->crc);
+ if (res->crcstate == 0)
+ res->crcstate = retval;
+ break;
+ case 1:
+ retval = core_bench_matrix(&(res->mat), dtype, res->crc);
+ if (res->crcmatrix == 0)
+ res->crcmatrix = retval;
+ break;
+ default:
+ retval = data;
+ break;
+ }
+ res->crc = crcu16(retval, res->crc);
+ retval &= 0x007f;
+ *pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */
+ return retval;
+ }
+}
+
+/* { dg-final { scan-assembler-not "cm\.push" } } */
+
new file mode 100644
@@ -0,0 +1,97 @@
+/* { dg-do compile } */
+/* { dg-options " -O2 -fno-shrink-wrap-separate -march=rv32imaf_zca_zcmp -mabi=ilp32f" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
+
+typedef struct MAT_PARAMS_S
+{
+ int N;
+ signed short *A;
+ signed short *B;
+ signed int *C;
+} mat_params;
+
+typedef struct CORE_PORTABLE_S
+{
+ unsigned char portable_id;
+} core_portable;
+
+typedef struct RESULTS_S
+{
+ /* inputs */
+ signed short seed1; /* Initializing seed */
+ signed short seed2; /* Initializing seed */
+ signed short seed3; /* Initializing seed */
+ void * memblock[4]; /* Pointer to safe memory location */
+ unsigned int size; /* Size of the data */
+ unsigned int iterations; /* Number of iterations to execute */
+ unsigned int execs; /* Bitmask of operations to execute */
+ struct list_head_s *list;
+ mat_params mat;
+ /* outputs */
+ unsigned short crc;
+ unsigned short crclist;
+ unsigned short crcmatrix;
+ unsigned short crcstate;
+ signed short err;
+ /* ultithread specific */
+ core_portable port;
+} core_results;
+
+extern signed short
+core_bench_state(unsigned int, void *, signed short, signed short, signed short, unsigned short);
+
+extern signed short
+core_bench_matrix(mat_params *, signed short, unsigned short);
+
+extern unsigned short
+crcu16(signed short, unsigned short);
+
+signed short
+calc_func(signed short *pdata, core_results *res)
+{
+ signed short data = *pdata;
+ signed short retval;
+ unsigned char optype
+ = (data >> 7)
+ & 1; /* bit 7 indicates if the function result has been cached */
+ if (optype) /* if cached, use cache */
+ return (data & 0x007f);
+ else
+ { /* otherwise calculate and cache the result */
+ signed short flag = data & 0x7; /* bits 0-2 is type of function to perform */
+ signed short dtype
+ = ((data >> 3)
+ & 0xf); /* bits 3-6 is specific data for the operation */
+ dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */
+ switch (flag)
+ {
+ case 0:
+ if (dtype < 0x22) /* set min period for bit corruption */
+ dtype = 0x22;
+ retval = core_bench_state(res->size,
+ res->memblock[3],
+ res->seed1,
+ res->seed2,
+ dtype,
+ res->crc);
+ if (res->crcstate == 0)
+ res->crcstate = retval;
+ break;
+ case 1:
+ retval = core_bench_matrix(&(res->mat), dtype, res->crc);
+ if (res->crcmatrix == 0)
+ res->crcmatrix = retval;
+ break;
+ default:
+ retval = data;
+ break;
+ }
+ res->crc = crcu16(retval, res->crc);
+ retval &= 0x007f;
+ *pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */
+ return retval;
+ }
+}
+
+/* { dg-final { scan-assembler "cm\.push" } } */
+