[v2,2/2] x86: Don't save callee-saved registers in noreturn functions
Checks
Commit Message
There is no need to save callee-saved registers in noreturn functions
if they don't throw nor support exceptions. We can treat them the same
as functions with no_callee_saved_registers attribute.
Adjust stack-check-17.c for noreturn function which no longer saves any
registers.
With this change, __libc_start_main in glibc 2.39, which is a noreturn
function, is changed from
__libc_start_main:
endbr64
push %r15
push %r14
mov %rcx,%r14
push %r13
push %r12
push %rbp
mov %esi,%ebp
push %rbx
mov %rdx,%rbx
sub $0x28,%rsp
mov %rdi,(%rsp)
mov %fs:0x28,%rax
mov %rax,0x18(%rsp)
xor %eax,%eax
test %r9,%r9
to
__libc_start_main:
endbr64
sub $0x28,%rsp
mov %esi,%ebp
mov %rdx,%rbx
mov %rcx,%r14
mov %rdi,(%rsp)
mov %fs:0x28,%rax
mov %rax,0x18(%rsp)
xor %eax,%eax
test %r9,%r9
In Linux kernel 6.7.0 on x86-64, do_exit is changed from
do_exit:
endbr64
call <do_exit+0x9>
push %r15
push %r14
push %r13
push %r12
mov %rdi,%r12
push %rbp
push %rbx
mov %gs:0x0,%rbx
sub $0x28,%rsp
mov %gs:0x28,%rax
mov %rax,0x20(%rsp)
xor %eax,%eax
call *0x0(%rip) # <do_exit+0x39>
test $0x2,%ah
je <do_exit+0x8d3>
to
do_exit:
endbr64
call <do_exit+0x9>
sub $0x28,%rsp
mov %rdi,%r12
mov %gs:0x28,%rax
mov %rax,0x20(%rsp)
xor %eax,%eax
mov %gs:0x0,%rbx
call *0x0(%rip) # <do_exit+0x2f>
test $0x2,%ah
je <do_exit+0x8c9>
I compared GCC master branch bootstrap and test times on a slow machine
with 6.6 Linux kernels compiled with the original GCC 13 and the GCC 13
with the backported patch. The performance data isn't precise since the
measurements were done on different days with different GCC sources under
different 6.6 kernel versions.
GCC master branch build time in seconds:
before after improvement
30043.75user 30013.16user 0%
1274.85system 1243.72system 2.4%
GCC master branch test time in seconds (new tests added):
before after improvement
216035.90user 216547.51user 0
27365.51system 26658.54system 2.6%
gcc/
PR target/38534
* config/i386/i386-options.cc (ix86_set_func_type): Don't
save and restore callee saved registers for a noreturn function
with nothrow or compiled with -fno-exceptions.
gcc/testsuite/
PR target/38534
* gcc.target/i386/pr38534-1.c: New file.
* gcc.target/i386/pr38534-2.c: Likewise.
* gcc.target/i386/pr38534-3.c: Likewise.
* gcc.target/i386/pr38534-4.c: Likewise.
* gcc.target/i386/stack-check-17.c: Updated.
---
gcc/config/i386/i386-options.cc | 16 ++++++++++--
gcc/testsuite/gcc.target/i386/pr38534-1.c | 26 +++++++++++++++++++
gcc/testsuite/gcc.target/i386/pr38534-2.c | 18 +++++++++++++
gcc/testsuite/gcc.target/i386/pr38534-3.c | 19 ++++++++++++++
gcc/testsuite/gcc.target/i386/pr38534-4.c | 18 +++++++++++++
.../gcc.target/i386/stack-check-17.c | 19 +++++---------
6 files changed, 102 insertions(+), 14 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-4.c
Comments
> I compared GCC master branch bootstrap and test times on a slow machine
> with 6.6 Linux kernels compiled with the original GCC 13 and the GCC 13
> with the backported patch. The performance data isn't precise since the
> measurements were done on different days with different GCC sources under
> different 6.6 kernel versions.
>
> GCC master branch build time in seconds:
>
> before after improvement
> 30043.75user 30013.16user 0%
> 1274.85system 1243.72system 2.4%
>
> GCC master branch test time in seconds (new tests added):
>
> before after improvement
> 216035.90user 216547.51user 0
> 27365.51system 26658.54system 2.6%
It is interesting - the system time difference comes from smaller
binary? Is the difference any significant?
>
> gcc/
>
> PR target/38534
> * config/i386/i386-options.cc (ix86_set_func_type): Don't
> save and restore callee saved registers for a noreturn function
> with nothrow or compiled with -fno-exceptions.
In general this looks like good thing to do. I wonder if that is not
something middle-end should understand for all targets.
Also I wonder about asynchronous stack unwinding. If we want to unwind
stack from interrupt then we may need some registers to be saved (like
base pointer).
Honza
>
> gcc/testsuite/
>
> PR target/38534
> * gcc.target/i386/pr38534-1.c: New file.
> * gcc.target/i386/pr38534-2.c: Likewise.
> * gcc.target/i386/pr38534-3.c: Likewise.
> * gcc.target/i386/pr38534-4.c: Likewise.
> * gcc.target/i386/stack-check-17.c: Updated.
> ---
> gcc/config/i386/i386-options.cc | 16 ++++++++++--
> gcc/testsuite/gcc.target/i386/pr38534-1.c | 26 +++++++++++++++++++
> gcc/testsuite/gcc.target/i386/pr38534-2.c | 18 +++++++++++++
> gcc/testsuite/gcc.target/i386/pr38534-3.c | 19 ++++++++++++++
> gcc/testsuite/gcc.target/i386/pr38534-4.c | 18 +++++++++++++
> .../gcc.target/i386/stack-check-17.c | 19 +++++---------
> 6 files changed, 102 insertions(+), 14 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-1.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-2.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-3.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-4.c
>
> diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
> index 0cdea30599e..f965568947c 100644
> --- a/gcc/config/i386/i386-options.cc
> +++ b/gcc/config/i386/i386-options.cc
> @@ -3371,9 +3371,21 @@ ix86_simd_clone_adjust (struct cgraph_node *node)
> static void
> ix86_set_func_type (tree fndecl)
> {
> + /* No need to save and restore callee-saved registers for a noreturn
> + function with nothrow or compiled with -fno-exceptions.
> +
> + NB: Don't use TREE_THIS_VOLATILE to check if this is a noreturn
> + function. The local-pure-const pass turns an interrupt function
> + into a noreturn function by setting TREE_THIS_VOLATILE. Normally
> + the local-pure-const pass is run after ix86_set_func_type is called.
> + When the local-pure-const pass is enabled for LTO, the interrupt
> + function is marked as noreturn in the IR output, which leads the
> + incompatible attribute error in LTO1. */
> bool has_no_callee_saved_registers
> - = lookup_attribute ("no_callee_saved_registers",
> - TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
> + = (((TREE_NOTHROW (fndecl) || !flag_exceptions)
> + && lookup_attribute ("noreturn", DECL_ATTRIBUTES (fndecl)))
> + || lookup_attribute ("no_callee_saved_registers",
> + TYPE_ATTRIBUTES (TREE_TYPE (fndecl))));
>
> if (cfun->machine->func_type == TYPE_UNKNOWN)
> {
> diff --git a/gcc/testsuite/gcc.target/i386/pr38534-1.c b/gcc/testsuite/gcc.target/i386/pr38534-1.c
> new file mode 100644
> index 00000000000..9297959e759
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr38534-1.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } */
> +
> +#define ARRAY_SIZE 256
> +
> +extern int array[ARRAY_SIZE][ARRAY_SIZE][ARRAY_SIZE];
> +extern int value (int, int, int)
> +#ifndef __x86_64__
> +__attribute__ ((regparm(3)))
> +#endif
> +;
> +
> +void
> +__attribute__((noreturn))
> +no_return_to_caller (void)
> +{
> + unsigned i, j, k;
> + for (i = ARRAY_SIZE; i > 0; --i)
> + for (j = ARRAY_SIZE; j > 0; --j)
> + for (k = ARRAY_SIZE; k > 0; --k)
> + array[i - 1][j - 1][k - 1] = value (i, j, k);
> + while (1);
> +}
> +
> +/* { dg-final { scan-assembler-not "push" } } */
> +/* { dg-final { scan-assembler-not "pop" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr38534-2.c b/gcc/testsuite/gcc.target/i386/pr38534-2.c
> new file mode 100644
> index 00000000000..1fb01363273
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr38534-2.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } */
> +
> +extern void bar (void) __attribute__ ((no_callee_saved_registers));
> +extern void fn (void) __attribute__ ((noreturn));
> +
> +__attribute__ ((noreturn))
> +void
> +foo (void)
> +{
> + bar ();
> + fn ();
> +}
> +
> +/* { dg-final { scan-assembler-not "push" } } */
> +/* { dg-final { scan-assembler-not "pop" } } */
> +/* { dg-final { scan-assembler-not "jmp\[\\t \]+_?bar" } } */
> +/* { dg-final { scan-assembler "call\[\\t \]+_?bar" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr38534-3.c b/gcc/testsuite/gcc.target/i386/pr38534-3.c
> new file mode 100644
> index 00000000000..87fc35f3fe9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr38534-3.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } */
> +
> +typedef void (*fn_t) (void) __attribute__ ((no_callee_saved_registers));
> +extern fn_t bar;
> +extern void fn (void) __attribute__ ((noreturn));
> +
> +__attribute__ ((noreturn))
> +void
> +foo (void)
> +{
> + bar ();
> + fn ();
> +}
> +
> +/* { dg-final { scan-assembler-not "push" } } */
> +/* { dg-final { scan-assembler-not "pop" } } */
> +/* { dg-final { scan-assembler-not "jmp" } } */
> +/* { dg-final { scan-assembler "call\[\\t \]+" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr38534-4.c b/gcc/testsuite/gcc.target/i386/pr38534-4.c
> new file mode 100644
> index 00000000000..561ebeef194
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr38534-4.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } */
> +
> +typedef void (*fn_t) (void) __attribute__ ((no_callee_saved_registers));
> +extern void fn (void) __attribute__ ((noreturn));
> +
> +__attribute__ ((noreturn))
> +void
> +foo (fn_t bar)
> +{
> + bar ();
> + fn ();
> +}
> +
> +/* { dg-final { scan-assembler-not "push" } } */
> +/* { dg-final { scan-assembler-not "pop" } } */
> +/* { dg-final { scan-assembler-not "jmp" } } */
> +/* { dg-final { scan-assembler "call\[\\t \]+" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/stack-check-17.c b/gcc/testsuite/gcc.target/i386/stack-check-17.c
> index b3e41cb3d25..061484e1319 100644
> --- a/gcc/testsuite/gcc.target/i386/stack-check-17.c
> +++ b/gcc/testsuite/gcc.target/i386/stack-check-17.c
> @@ -23,19 +23,14 @@ f3 (void)
> /* Verify no explicit probes. */
> /* { dg-final { scan-assembler-not "or\[ql\]" } } */
>
> -/* We also want to verify we did not use a push/pop sequence
> - to probe *sp as the callee register saves are sufficient
> - to probe *sp.
> -
> - y0/y1 are live across the call and thus must be allocated
> +/* y0/y1 are live across the call and thus must be allocated
> into either a stack slot or callee saved register. The former
> would be rather dumb. So assume it does not happen.
>
> - So search for two/four pushes for the callee register saves/argument pushes
> - (plus one for the PIC register if needed on ia32) and no pops (since the
> - function has no reachable epilogue). */
> -/* { dg-final { scan-assembler-times "push\[ql\]" 2 { target { ! ia32 } } } } */
> -/* { dg-final { scan-assembler-times "push\[ql\]" 4 { target { ia32 && nonpic } } } } */
> -/* { dg-final { scan-assembler-times "push\[ql\]" 5 { target { ia32 && { ! nonpic } } } } } */
> -/* { dg-final { scan-assembler-not "pop" } } */
> + So search for a push/pop sequence for stack probe and 2 argument
> + pushes on ia32. There is no need to save and restore the PIC
> + register on ia32 for a noreturn function. */
> +/* { dg-final { scan-assembler-times "push\[ql\]" 1 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "push\[ql\]" 3 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "pop" 1 } } */
>
> --
> 2.43.0
>
On Mon, Jan 22, 2024 at 8:58 AM Jan Hubicka <hubicka@ucw.cz> wrote:
>
> > I compared GCC master branch bootstrap and test times on a slow machine
> > with 6.6 Linux kernels compiled with the original GCC 13 and the GCC 13
> > with the backported patch. The performance data isn't precise since the
> > measurements were done on different days with different GCC sources under
> > different 6.6 kernel versions.
> >
> > GCC master branch build time in seconds:
> >
> > before after improvement
> > 30043.75user 30013.16user 0%
> > 1274.85system 1243.72system 2.4%
> >
> > GCC master branch test time in seconds (new tests added):
> >
> > before after improvement
> > 216035.90user 216547.51user 0
> > 27365.51system 26658.54system 2.6%
>
> It is interesting - the system time difference comes from smaller
> binary? Is the difference any significant?
I think it comes from
In Linux kernel 6.7.0 on x86-64, do_exit is changed from
do_exit:
endbr64
call <do_exit+0x9>
push %r15
push %r14
push %r13
push %r12
mov %rdi,%r12
push %rbp
push %rbx
mov %gs:0x0,%rbx
sub $0x28,%rsp
mov %gs:0x28,%rax
mov %rax,0x20(%rsp)
xor %eax,%eax
call *0x0(%rip) # <do_exit+0x39>
test $0x2,%ah
je <do_exit+0x8d3>
to
do_exit:
endbr64
call <do_exit+0x9>
sub $0x28,%rsp
mov %rdi,%r12
mov %gs:0x28,%rax
mov %rax,0x20(%rsp)
xor %eax,%eax
mov %gs:0x0,%rbx
call *0x0(%rip) # <do_exit+0x2f>
test $0x2,%ah
je <do_exit+0x8c9>
do_exit is called by every process when it exists.
> >
> > gcc/
> >
> > PR target/38534
> > * config/i386/i386-options.cc (ix86_set_func_type): Don't
> > save and restore callee saved registers for a noreturn function
> > with nothrow or compiled with -fno-exceptions.
>
> In general this looks like good thing to do. I wonder if that is not
> something middle-end should understand for all targets.
> Also I wonder about asynchronous stack unwinding. If we want to unwind
> stack from interrupt then we may need some registers to be saved (like
> base pointer).
It is compatible with -fasynchronous-unwind-tables. From glibc test
debug/tst-longjmp_chk:
Starting program:
/export/build/gnu/tools-build/glibc-cet/build-x86_64-linux/debug/tst-longjmp_chk
--direct
warning: Unable to find libthread_db matching inferior's thread
library, thread debugging will not be available.
Program received signal SIGABRT, Aborted.
__pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6,
no_tid=no_tid@entry=0) at pthread_kill.c:44
44 return INTERNAL_SYSCALL_ERROR_P (ret) ?
INTERNAL_SYSCALL_ERRNO (ret) : 0;
(gdb) bt
#0 __pthread_kill_implementation (threadid=<optimized out>,
signo=signo@entry=6, no_tid=no_tid@entry=0) at pthread_kill.c:44
#1 0x0000555555294a4b in __pthread_kill_internal (signo=6,
threadid=<optimized out>) at pthread_kill.c:78
#2 0x000055555523da1a in __GI_raise (sig=sig@entry=6)
at ../sysdeps/posix/raise.c:26
#3 0x00005555552248b3 in __GI_abort () at abort.c:79
#4 0x0000555555225a7e in __libc_message_impl (
fmt=fmt@entry=0x5555553b7171 "*** %s ***: terminated\n")
at ../sysdeps/posix/libc_fatal.c:132
#5 0x0000555555324517 in __GI___fortify_fail (msg=<optimized out>)
at fortify_fail.c:24
#6 0x0000555555323411 in ____longjmp_chk ()
at ../sysdeps/x86_64/__longjmp.S:57
#7 0x0000555555324d6d in __GI___longjmp_chk (
env=env@entry=0x55555555a200 <b>, val=val@entry=1)
at ../setjmp/longjmp.c:41
#8 0x0000555555556a00 in do_test () at tst-longjmp_chk.c:70
#9 0x0000555555557388 in support_test_main (argc=1431675392,
argv=0x7fffffffdd30, config=0x1, config@entry=0x7fffffffdbe0)
at support_test_main.c:413
#10 0x000055555555673f in main (argc=<optimized out>, argv=<optimized out>)
at ../support/test-driver.c:170
(gdb)
abort is a return function:
extern void abort (void) __THROW __attribute__ ((__noreturn__));
Callee-saved registers aren't saved:
Dump of assembler code for function __GI_abort:
0x00005555552247de <+0>: endbr64
0x00005555552247e2 <+4>: sub $0xa8,%rsp
0x00005555552247e9 <+11>: lea 0x1d1540(%rip),%rbx #
0x5555553f5d30 <lock>
0x00005555552247f0 <+18>: mov %fs:0x28,%rax
0x00005555552247f9 <+27>: mov %rax,0x98(%rsp)
0x0000555555224801 <+35>: xor %eax,%eax
0x0000555555224803 <+37>: mov %fs:0x10,%rbp
0x000055555522480c <+46>: cmp %rbp,0x1d1525(%rip) #
0x5555553f5d38 <lock+8>
0x0000555555224813 <+53>: je 0x555555224833 <__GI_abort+85>
0x0000555555224815 <+55>: mov $0x1,%edx
0x000055555522481a <+60>: lock cmpxchg %edx,0x1d150e(%rip) #
0x5555553f5d30 <lock>
0x0000555555224822 <+68>: je 0x55555522482c <__GI_abort+78>
0x0000555555224824 <+70>: mov %rbx,%rdi
> Honza
> >
> > gcc/testsuite/
> >
> > PR target/38534
> > * gcc.target/i386/pr38534-1.c: New file.
> > * gcc.target/i386/pr38534-2.c: Likewise.
> > * gcc.target/i386/pr38534-3.c: Likewise.
> > * gcc.target/i386/pr38534-4.c: Likewise.
> > * gcc.target/i386/stack-check-17.c: Updated.
> > ---
> > gcc/config/i386/i386-options.cc | 16 ++++++++++--
> > gcc/testsuite/gcc.target/i386/pr38534-1.c | 26 +++++++++++++++++++
> > gcc/testsuite/gcc.target/i386/pr38534-2.c | 18 +++++++++++++
> > gcc/testsuite/gcc.target/i386/pr38534-3.c | 19 ++++++++++++++
> > gcc/testsuite/gcc.target/i386/pr38534-4.c | 18 +++++++++++++
> > .../gcc.target/i386/stack-check-17.c | 19 +++++---------
> > 6 files changed, 102 insertions(+), 14 deletions(-)
> > create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-1.c
> > create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-2.c
> > create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-3.c
> > create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-4.c
> >
> > diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
> > index 0cdea30599e..f965568947c 100644
> > --- a/gcc/config/i386/i386-options.cc
> > +++ b/gcc/config/i386/i386-options.cc
> > @@ -3371,9 +3371,21 @@ ix86_simd_clone_adjust (struct cgraph_node *node)
> > static void
> > ix86_set_func_type (tree fndecl)
> > {
> > + /* No need to save and restore callee-saved registers for a noreturn
> > + function with nothrow or compiled with -fno-exceptions.
> > +
> > + NB: Don't use TREE_THIS_VOLATILE to check if this is a noreturn
> > + function. The local-pure-const pass turns an interrupt function
> > + into a noreturn function by setting TREE_THIS_VOLATILE. Normally
> > + the local-pure-const pass is run after ix86_set_func_type is called.
> > + When the local-pure-const pass is enabled for LTO, the interrupt
> > + function is marked as noreturn in the IR output, which leads the
> > + incompatible attribute error in LTO1. */
> > bool has_no_callee_saved_registers
> > - = lookup_attribute ("no_callee_saved_registers",
> > - TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
> > + = (((TREE_NOTHROW (fndecl) || !flag_exceptions)
> > + && lookup_attribute ("noreturn", DECL_ATTRIBUTES (fndecl)))
> > + || lookup_attribute ("no_callee_saved_registers",
> > + TYPE_ATTRIBUTES (TREE_TYPE (fndecl))));
> >
> > if (cfun->machine->func_type == TYPE_UNKNOWN)
> > {
> > diff --git a/gcc/testsuite/gcc.target/i386/pr38534-1.c b/gcc/testsuite/gcc.target/i386/pr38534-1.c
> > new file mode 100644
> > index 00000000000..9297959e759
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr38534-1.c
> > @@ -0,0 +1,26 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } */
> > +
> > +#define ARRAY_SIZE 256
> > +
> > +extern int array[ARRAY_SIZE][ARRAY_SIZE][ARRAY_SIZE];
> > +extern int value (int, int, int)
> > +#ifndef __x86_64__
> > +__attribute__ ((regparm(3)))
> > +#endif
> > +;
> > +
> > +void
> > +__attribute__((noreturn))
> > +no_return_to_caller (void)
> > +{
> > + unsigned i, j, k;
> > + for (i = ARRAY_SIZE; i > 0; --i)
> > + for (j = ARRAY_SIZE; j > 0; --j)
> > + for (k = ARRAY_SIZE; k > 0; --k)
> > + array[i - 1][j - 1][k - 1] = value (i, j, k);
> > + while (1);
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "push" } } */
> > +/* { dg-final { scan-assembler-not "pop" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr38534-2.c b/gcc/testsuite/gcc.target/i386/pr38534-2.c
> > new file mode 100644
> > index 00000000000..1fb01363273
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr38534-2.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } */
> > +
> > +extern void bar (void) __attribute__ ((no_callee_saved_registers));
> > +extern void fn (void) __attribute__ ((noreturn));
> > +
> > +__attribute__ ((noreturn))
> > +void
> > +foo (void)
> > +{
> > + bar ();
> > + fn ();
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "push" } } */
> > +/* { dg-final { scan-assembler-not "pop" } } */
> > +/* { dg-final { scan-assembler-not "jmp\[\\t \]+_?bar" } } */
> > +/* { dg-final { scan-assembler "call\[\\t \]+_?bar" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr38534-3.c b/gcc/testsuite/gcc.target/i386/pr38534-3.c
> > new file mode 100644
> > index 00000000000..87fc35f3fe9
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr38534-3.c
> > @@ -0,0 +1,19 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } */
> > +
> > +typedef void (*fn_t) (void) __attribute__ ((no_callee_saved_registers));
> > +extern fn_t bar;
> > +extern void fn (void) __attribute__ ((noreturn));
> > +
> > +__attribute__ ((noreturn))
> > +void
> > +foo (void)
> > +{
> > + bar ();
> > + fn ();
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "push" } } */
> > +/* { dg-final { scan-assembler-not "pop" } } */
> > +/* { dg-final { scan-assembler-not "jmp" } } */
> > +/* { dg-final { scan-assembler "call\[\\t \]+" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr38534-4.c b/gcc/testsuite/gcc.target/i386/pr38534-4.c
> > new file mode 100644
> > index 00000000000..561ebeef194
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr38534-4.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } */
> > +
> > +typedef void (*fn_t) (void) __attribute__ ((no_callee_saved_registers));
> > +extern void fn (void) __attribute__ ((noreturn));
> > +
> > +__attribute__ ((noreturn))
> > +void
> > +foo (fn_t bar)
> > +{
> > + bar ();
> > + fn ();
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "push" } } */
> > +/* { dg-final { scan-assembler-not "pop" } } */
> > +/* { dg-final { scan-assembler-not "jmp" } } */
> > +/* { dg-final { scan-assembler "call\[\\t \]+" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/stack-check-17.c b/gcc/testsuite/gcc.target/i386/stack-check-17.c
> > index b3e41cb3d25..061484e1319 100644
> > --- a/gcc/testsuite/gcc.target/i386/stack-check-17.c
> > +++ b/gcc/testsuite/gcc.target/i386/stack-check-17.c
> > @@ -23,19 +23,14 @@ f3 (void)
> > /* Verify no explicit probes. */
> > /* { dg-final { scan-assembler-not "or\[ql\]" } } */
> >
> > -/* We also want to verify we did not use a push/pop sequence
> > - to probe *sp as the callee register saves are sufficient
> > - to probe *sp.
> > -
> > - y0/y1 are live across the call and thus must be allocated
> > +/* y0/y1 are live across the call and thus must be allocated
> > into either a stack slot or callee saved register. The former
> > would be rather dumb. So assume it does not happen.
> >
> > - So search for two/four pushes for the callee register saves/argument pushes
> > - (plus one for the PIC register if needed on ia32) and no pops (since the
> > - function has no reachable epilogue). */
> > -/* { dg-final { scan-assembler-times "push\[ql\]" 2 { target { ! ia32 } } } } */
> > -/* { dg-final { scan-assembler-times "push\[ql\]" 4 { target { ia32 && nonpic } } } } */
> > -/* { dg-final { scan-assembler-times "push\[ql\]" 5 { target { ia32 && { ! nonpic } } } } } */
> > -/* { dg-final { scan-assembler-not "pop" } } */
> > + So search for a push/pop sequence for stack probe and 2 argument
> > + pushes on ia32. There is no need to save and restore the PIC
> > + register on ia32 for a noreturn function. */
> > +/* { dg-final { scan-assembler-times "push\[ql\]" 1 { target { ! ia32 } } } } */
> > +/* { dg-final { scan-assembler-times "push\[ql\]" 3 { target ia32 } } } */
> > +/* { dg-final { scan-assembler-times "pop" 1 } } */
> >
> > --
> > 2.43.0
> >
@@ -3371,9 +3371,21 @@ ix86_simd_clone_adjust (struct cgraph_node *node)
static void
ix86_set_func_type (tree fndecl)
{
+ /* No need to save and restore callee-saved registers for a noreturn
+ function with nothrow or compiled with -fno-exceptions.
+
+ NB: Don't use TREE_THIS_VOLATILE to check if this is a noreturn
+ function. The local-pure-const pass turns an interrupt function
+ into a noreturn function by setting TREE_THIS_VOLATILE. Normally
+ the local-pure-const pass is run after ix86_set_func_type is called.
+ When the local-pure-const pass is enabled for LTO, the interrupt
+ function is marked as noreturn in the IR output, which leads the
+ incompatible attribute error in LTO1. */
bool has_no_callee_saved_registers
- = lookup_attribute ("no_callee_saved_registers",
- TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
+ = (((TREE_NOTHROW (fndecl) || !flag_exceptions)
+ && lookup_attribute ("noreturn", DECL_ATTRIBUTES (fndecl)))
+ || lookup_attribute ("no_callee_saved_registers",
+ TYPE_ATTRIBUTES (TREE_TYPE (fndecl))));
if (cfun->machine->func_type == TYPE_UNKNOWN)
{
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } */
+
+#define ARRAY_SIZE 256
+
+extern int array[ARRAY_SIZE][ARRAY_SIZE][ARRAY_SIZE];
+extern int value (int, int, int)
+#ifndef __x86_64__
+__attribute__ ((regparm(3)))
+#endif
+;
+
+void
+__attribute__((noreturn))
+no_return_to_caller (void)
+{
+ unsigned i, j, k;
+ for (i = ARRAY_SIZE; i > 0; --i)
+ for (j = ARRAY_SIZE; j > 0; --j)
+ for (k = ARRAY_SIZE; k > 0; --k)
+ array[i - 1][j - 1][k - 1] = value (i, j, k);
+ while (1);
+}
+
+/* { dg-final { scan-assembler-not "push" } } */
+/* { dg-final { scan-assembler-not "pop" } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } */
+
+extern void bar (void) __attribute__ ((no_callee_saved_registers));
+extern void fn (void) __attribute__ ((noreturn));
+
+__attribute__ ((noreturn))
+void
+foo (void)
+{
+ bar ();
+ fn ();
+}
+
+/* { dg-final { scan-assembler-not "push" } } */
+/* { dg-final { scan-assembler-not "pop" } } */
+/* { dg-final { scan-assembler-not "jmp\[\\t \]+_?bar" } } */
+/* { dg-final { scan-assembler "call\[\\t \]+_?bar" } } */
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } */
+
+typedef void (*fn_t) (void) __attribute__ ((no_callee_saved_registers));
+extern fn_t bar;
+extern void fn (void) __attribute__ ((noreturn));
+
+__attribute__ ((noreturn))
+void
+foo (void)
+{
+ bar ();
+ fn ();
+}
+
+/* { dg-final { scan-assembler-not "push" } } */
+/* { dg-final { scan-assembler-not "pop" } } */
+/* { dg-final { scan-assembler-not "jmp" } } */
+/* { dg-final { scan-assembler "call\[\\t \]+" } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move" } */
+
+typedef void (*fn_t) (void) __attribute__ ((no_callee_saved_registers));
+extern void fn (void) __attribute__ ((noreturn));
+
+__attribute__ ((noreturn))
+void
+foo (fn_t bar)
+{
+ bar ();
+ fn ();
+}
+
+/* { dg-final { scan-assembler-not "push" } } */
+/* { dg-final { scan-assembler-not "pop" } } */
+/* { dg-final { scan-assembler-not "jmp" } } */
+/* { dg-final { scan-assembler "call\[\\t \]+" } } */
@@ -23,19 +23,14 @@ f3 (void)
/* Verify no explicit probes. */
/* { dg-final { scan-assembler-not "or\[ql\]" } } */
-/* We also want to verify we did not use a push/pop sequence
- to probe *sp as the callee register saves are sufficient
- to probe *sp.
-
- y0/y1 are live across the call and thus must be allocated
+/* y0/y1 are live across the call and thus must be allocated
into either a stack slot or callee saved register. The former
would be rather dumb. So assume it does not happen.
- So search for two/four pushes for the callee register saves/argument pushes
- (plus one for the PIC register if needed on ia32) and no pops (since the
- function has no reachable epilogue). */
-/* { dg-final { scan-assembler-times "push\[ql\]" 2 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "push\[ql\]" 4 { target { ia32 && nonpic } } } } */
-/* { dg-final { scan-assembler-times "push\[ql\]" 5 { target { ia32 && { ! nonpic } } } } } */
-/* { dg-final { scan-assembler-not "pop" } } */
+ So search for a push/pop sequence for stack probe and 2 argument
+ pushes on ia32. There is no need to save and restore the PIC
+ register on ia32 for a noreturn function. */
+/* { dg-final { scan-assembler-times "push\[ql\]" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "push\[ql\]" 3 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "pop" 1 } } */