Hi,
As PR99888 and its related show, the current support for
-fpatchable-function-entry on powerpc ELFv2 doesn't work
well with global entry existence. For example, with one
command line option -fpatchable-function-entry=3,2, it got
below w/o this patch:
.LPFE1:
nop
nop
.type foo, @function
foo:
nop
.LFB0:
.cfi_startproc
.LCF0:
0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l
.localentry foo,.-foo
, the assembly is unexpected since the patched NOPs have
no effects when being entered from local entry.
This patch is to update the NOPs patched before and after
local entry, it looks like:
.type foo, @function
foo:
.LFB0:
.cfi_startproc
.LCF0:
0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l
nop
nop
.localentry foo,.-foo
nop
Bootstrapped and regtested on powerpc64-linux-gnu P7 & P8,
and powerpc64le-linux-gnu P9 & P10.
v2: Update some comments, error message wordings, and test
cases as Segher's review comments.
v1: https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599461.html
Is it ok for trunk?
BR,
Kewen
-----
PR target/99888
PR target/105649
gcc/ChangeLog:
* config/rs6000/rs6000-internal.h
(rs6000_print_patchable_function_entry): New function declaration.
* config/rs6000/rs6000-logue.cc (rs6000_output_function_prologue):
Support patchable-function-entry by emitting NOPs before and after
local entry for the function that needs global entry.
* config/rs6000/rs6000.cc (rs6000_print_patchable_function_entry): Skip
the function that needs global entry till global entry has been
emitted.
* config/rs6000/rs6000.h (struct machine_function): New bool member
global_entry_emitted.
gcc/testsuite/ChangeLog:
* gcc.target/powerpc/pr99888-1.c: New test.
* gcc.target/powerpc/pr99888-2.c: New test.
* gcc.target/powerpc/pr99888-3.c: New test.
* gcc.target/powerpc/pr99888-4.c: New test.
* gcc.target/powerpc/pr99888-5.c: New test.
* gcc.target/powerpc/pr99888-6.c: New test.
* c-c++-common/patchable_function_entry-default.c: Adjust for
powerpc_elfv2 to avoid compilation error.
---
gcc/config/rs6000/rs6000-internal.h | 5 +++
gcc/config/rs6000/rs6000-logue.cc | 32 +++++++++++++
gcc/config/rs6000/rs6000.cc | 10 ++++-
gcc/config/rs6000/rs6000.h | 4 ++
.../patchable_function_entry-default.c | 1 +
gcc/testsuite/gcc.target/powerpc/pr99888-1.c | 45 +++++++++++++++++++
gcc/testsuite/gcc.target/powerpc/pr99888-2.c | 45 +++++++++++++++++++
gcc/testsuite/gcc.target/powerpc/pr99888-3.c | 12 +++++
gcc/testsuite/gcc.target/powerpc/pr99888-4.c | 13 ++++++
gcc/testsuite/gcc.target/powerpc/pr99888-5.c | 13 ++++++
gcc/testsuite/gcc.target/powerpc/pr99888-6.c | 14 ++++++
11 files changed, 192 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-1.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-2.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-3.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-4.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-5.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-6.c
--
2.27.0
Hi!
On Fri, Aug 12, 2022 at 05:40:06PM +0800, Kewen.Lin wrote:
> This patch is to update the NOPs patched before and after
> local entry, it looks like:
As I said before, please don't say NOPs. I know some documentation
does. That docvumentation needs fixing. This is not an acronym or
similar: "nop" is short for "noop" or "no-op", meaning "no operation".
It also is a well-accepted term. It also is an assembler mnemonic, and
has to be written as all lower case there as well.
Just say "nops" please.
> --- a/gcc/testsuite/c-c++-common/patchable_function_entry-default.c
> +++ b/gcc/testsuite/c-c++-common/patchable_function_entry-default.c
> @@ -1,6 +1,7 @@
> /* { dg-do compile { target { ! { nvptx*-*-* visium-*-* } } } } */
> /* { dg-options "-O2 -fpatchable-function-entry=3,1" } */
> /* { dg-additional-options "-fno-pie" { target sparc*-*-* } } */
> +/* { dg-additional-options "-fpatchable-function-entry=3,2" { target powerpc_elfv2 } } */
Add a comment why this is needed? People looking at this testcase in
the future (including yourself!) will thank you.
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-1.c
> @@ -0,0 +1,45 @@
> +/* { dg-require-effective-target powerpc_elfv2 } */
Does this not work on other PowerPC targets?
> +/* Verify no errors for different NOPs after local entry. */
(Add "on ELFv2" if you make the test run everywhere :-) )
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-2.c
> @@ -0,0 +1,45 @@
> +/* { dg-require-effective-target powerpc_elfv2 } */
> +
> +/* Verify no errors for 2, 6 and 14 NOPs before local entry. */
Similar.
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-3.c
> @@ -0,0 +1,12 @@
> +/* { dg-require-effective-target powerpc_elfv2 } */
> +/* { dg-options "-fpatchable-function-entry=1" } */
> +
> +/* Verify no errors, using command line option instead of function
> + attribute. */
> +
> +extern int a;
> +
> +int test (int b) {
> + return a + b;
> +}
And more.
Rest looks good, thanks!
Segher
@@ -182,10 +182,15 @@ extern tree rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED,
tree *args ATTRIBUTE_UNUSED,
bool ignore ATTRIBUTE_UNUSED);
+extern void rs6000_print_patchable_function_entry (FILE *,
+ unsigned HOST_WIDE_INT,
+ bool);
+
extern bool rs6000_passes_float;
extern bool rs6000_passes_long_double;
extern bool rs6000_passes_vector;
extern bool rs6000_returns_struct;
extern bool cpu_builtin_p;
+
#endif
@@ -4013,11 +4013,43 @@ rs6000_output_function_prologue (FILE *file)
fprintf (file, "\tadd 2,2,12\n");
}
+ unsigned short patch_area_size = crtl->patch_area_size;
+ unsigned short patch_area_entry = crtl->patch_area_entry;
+ /* Need to emit the patching area. */
+ if (patch_area_size > 0)
+ {
+ cfun->machine->global_entry_emitted = true;
+ /* As ELFv2 ABI shows, the allowable bytes between the global
+ and local entry points are 0, 4, 8, 16, 32 and 64 when
+ there is a local entry point. Considering there are two
+ non-prefixed instructions for global entry point prologue
+ (8 bytes), the count for patchable NOPs before local entry
+ point would be 2, 6 and 14. It's possible to support those
+ other counts of NOPs by not making a local entry point, but
+ we don't have clear user cases for them, so leave them
+ unsupported for now. */
+ if (patch_area_entry > 0)
+ {
+ if (patch_area_entry != 2
+ && patch_area_entry != 6
+ && patch_area_entry != 14)
+ error ("unsupported number of nops before function entry (%u)",
+ patch_area_entry);
+ rs6000_print_patchable_function_entry (file, patch_area_entry,
+ true);
+ patch_area_size -= patch_area_entry;
+ }
+ }
+
fputs ("\t.localentry\t", file);
assemble_name (file, name);
fputs (",.-", file);
assemble_name (file, name);
fputs ("\n", file);
+ /* Emit the NOPs after local entry. */
+ if (patch_area_size > 0)
+ rs6000_print_patchable_function_entry (file, patch_area_size,
+ patch_area_entry == 0);
}
else if (rs6000_pcrel_p ())
@@ -14898,8 +14898,14 @@ rs6000_print_patchable_function_entry (FILE *file,
if (!(TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
&& HAVE_GAS_SECTION_LINK_ORDER)
flags |= SECTION_LINK_ORDER;
- default_print_patchable_function_entry_1 (file, patch_area_size, record_p,
- flags);
+ bool global_entry_needed_p = rs6000_global_entry_point_prologue_needed_p ();
+ /* For a function which needs global entry point, we will emit the
+ patchable area before and after local entry point under the control of
+ cfun->machine->global_entry_emitted, see the handling in function
+ rs6000_output_function_prologue. */
+ if (!global_entry_needed_p || cfun->machine->global_entry_emitted)
+ default_print_patchable_function_entry_1 (file, patch_area_size, record_p,
+ flags);
}
enum rtx_code
@@ -2439,6 +2439,10 @@ typedef struct GTY(()) machine_function
bool lr_is_wrapped_separately;
bool toc_is_wrapped_separately;
bool mma_return_type_error;
+ /* Indicate global entry is emitted, only useful when the function requires
+ global entry. It helps to control the patchable area before and after
+ local entry. */
+ bool global_entry_emitted;
} machine_function;
#endif
@@ -1,6 +1,7 @@
/* { dg-do compile { target { ! { nvptx*-*-* visium-*-* } } } } */
/* { dg-options "-O2 -fpatchable-function-entry=3,1" } */
/* { dg-additional-options "-fno-pie" { target sparc*-*-* } } */
+/* { dg-additional-options "-fpatchable-function-entry=3,2" { target powerpc_elfv2 } } */
/* { dg-final { scan-assembler-times "nop|NOP|SWYM" 3 { target { ! { alpha*-*-* } } } } } */
/* { dg-final { scan-assembler-times "bis" 3 { target alpha*-*-* } } } */
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-require-effective-target powerpc_elfv2 } */
+
+/* Verify no errors for different NOPs after local entry. */
+
+extern int a;
+
+__attribute__ ((noipa, patchable_function_entry (1, 0)))
+int test1 (int b) {
+ return a + b;
+}
+
+__attribute__ ((noipa, patchable_function_entry (2, 0)))
+int test2 (int b) {
+ return a + b;
+}
+
+__attribute__ ((noipa, patchable_function_entry (3, 0)))
+int test3 (int b) {
+ return a + b;
+}
+
+__attribute__ ((noipa, patchable_function_entry (4, 0)))
+int test4 (int b) {
+ return a + b;
+}
+
+__attribute__ ((noipa, patchable_function_entry (5, 0)))
+int test5 (int b) {
+ return a + b;
+}
+
+__attribute__ ((noipa, patchable_function_entry (6, 0)))
+int test6 (int b) {
+ return a + b;
+}
+
+__attribute__ ((noipa, patchable_function_entry (7, 0)))
+int test7 (int b) {
+ return a + b;
+}
+
+__attribute__ ((noipa, patchable_function_entry (8, 0)))
+int test8 (int b) {
+ return a + b;
+}
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-require-effective-target powerpc_elfv2 } */
+
+/* Verify no errors for 2, 6 and 14 NOPs before local entry. */
+
+extern int a;
+
+__attribute__ ((noipa, patchable_function_entry (2, 2)))
+int test1 (int b) {
+ return a + b;
+}
+
+__attribute__ ((noipa, patchable_function_entry (4, 2)))
+int test2 (int b) {
+ return a + b;
+}
+
+__attribute__ ((noipa, patchable_function_entry (6, 6)))
+int test3 (int b) {
+ return a + b;
+}
+
+__attribute__ ((noipa, patchable_function_entry (8, 6)))
+int test4 (int b) {
+ return a + b;
+}
+
+__attribute__ ((noipa, patchable_function_entry (16, 6)))
+int test5 (int b) {
+ return a + b;
+}
+
+__attribute__ ((noipa, patchable_function_entry (14, 14)))
+int test6 (int b) {
+ return a + b;
+}
+
+__attribute__ ((noipa, patchable_function_entry (28, 14)))
+int test7 (int b) {
+ return a + b;
+}
+
+__attribute__ ((noipa, patchable_function_entry (64, 14)))
+int test8 (int b) {
+ return a + b;
+}
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-options "-fpatchable-function-entry=1" } */
+
+/* Verify no errors, using command line option instead of function
+ attribute. */
+
+extern int a;
+
+int test (int b) {
+ return a + b;
+}
+
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-require-effective-target powerpc_elfv2 } */
+/* There is no global entry point prologue with pcrel. */
+/* { dg-options "-mno-pcrel -fpatchable-function-entry=1,1" } */
+
+/* Verify one error emitted for unexpected 1 NOP before local
+ entry. */
+
+extern int a;
+
+int test (int b) {
+ return a + b;
+}
+/* { dg-error "unsupported number of nops before function entry \\(1\\)" "" { target *-*-* } .-1 } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-require-effective-target powerpc_elfv2 } */
+/* There is no global entry point prologue with pcrel. */
+/* { dg-options "-mno-pcrel -fpatchable-function-entry=7,3" } */
+
+/* Verify one error emitted for unexpected 3 NOP before local
+ entry. */
+
+extern int a;
+
+int test (int b) {
+ return a + b;
+}
+/* { dg-error "unsupported number of nops before function entry \\(3\\)" "" { target *-*-* } .-1 } */
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target powerpc_elfv2 } */
+/* There is no global entry point prologue with pcrel. */
+/* { dg-options "-mno-pcrel" } */
+
+/* Verify one error emitted for unexpected 4 NOP before local
+ entry. */
+
+extern int a;
+
+__attribute__ ((patchable_function_entry (20, 4)))
+int test (int b) {
+ return a + b;
+}
+/* { dg-error "unsupported number of nops before function entry \\(4\\)" "" { target *-*-* } .-1 } */