[RISC-V] Optimize GP-relative addressing for linker.
Checks
Commit Message
Consider the following test:
//test.c file
struct Person {
char name[20];
int age;
int nation;
int hobby[100];
};
int global_var;
int global_array[920];
struct Person person;
int main() {
global_var = 3;
global_array[365] = 16;
sprintf(person.name, "Lee");
person.age = 27;
person.nation = 77;
return 0;
}
Cflags:
-Xlinker --relax
Link relaxation can be turned on by the above option, and in fact
is turned on by default. After compiling, linking, and disassembling
the test files, there are the following results:
Before this patch:
Disassembly of section \.text:
0+[0-9a-f]+ <main>:
.*:[ ]+[0-9a-f]+[ ]+sw[ ]+[0-9a-f]+,gp,\-[0-9]+ # [0-9a-f]+ <global_var>
.*:[ ]+[0-9a-f]+[ ]+c\.lui[ ]+.*
.*:[ ]+[0-9a-f]+[ ]+addi[ ]+[0-9a-f]+,[0-9a-f]+,[0-9]+ # [0-9a-f]+ <global_array>
.*:[ ]+[0-9a-f]+[ ]+c\.lui[ ]+.*
.*:[ ]+[0-9a-f]+[ ]+addi[ ]+[0-9a-f]+,[0-9a-f]+,[0-9]+ # [0-9a-f]+ <person>
After this patch:
Disassembly of section \.text:
0+[0-9a-f]+ <main>:
.*:[ ]+[0-9a-f]+[ ]+sw[ ]+[0-9a-f]+,gp,\-[0-9]+ # [0-9a-f]+ <global_var>
.*:[ ]+[0-9a-f]+[ ]+addi[ ]+[0-9a-f]+,gp,\-[0-9]+ # [0-9a-f]+ <global_array>
.*:[ ]+[0-9a-f]+[ ]+addi[ ]+[0-9a-f]+,gp,\-[0-9]+ # [0-9a-f]+ <person>
After applying this patch, both array element and structure member
accesses have been optimized. In fact, for both array elements and
structure members, access is based on the data and the base address
of the structure, which is the address of the first member of the
array or structure. However, current linker takes into account the
size of arrays and structures when performing GP-relative addressing.
As a result, some array and structure bases within the GP-relative
addressing range cannot benefit from relaxation optimization. This
patch resolves this issue.
Signed-off-by: Die Li <lidie@eswincomputing.com>
ChangeLog:
* bfd/elfnn-riscv.c (_bfd_riscv_relax_section): Update reserve_size.
* ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp: New test entry.
* ld/testsuite/ld-riscv-elf/gp-relax.d: New test.
* ld/testsuite/ld-riscv-elf/gp-relax.s: New test.
---
bfd/elfnn-riscv.c | 7 +++++
ld/testsuite/ld-riscv-elf/gp-relax.d | 12 +++++++++
ld/testsuite/ld-riscv-elf/gp-relax.s | 31 ++++++++++++++++++++++
ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp | 1 +
4 files changed, 51 insertions(+)
create mode 100644 ld/testsuite/ld-riscv-elf/gp-relax.d
create mode 100644 ld/testsuite/ld-riscv-elf/gp-relax.s
Comments
Testcase in your git commit has a base offset, but the testcase in the
code only contains a base with no offset?
I am a little concerned about the case with an offset like "lui
a5,%hi(global_array+256) addi a5,a5,%lo(global_array+256)" is still
right for this optimization?
On Tue, Jun 27, 2023 at 8:47 PM Die Li <lidie@eswincomputing.com> wrote:
>
> Consider the following test:
>
> //test.c file
>
> struct Person {
> char name[20];
> int age;
> int nation;
> int hobby[100];
> };
>
> int global_var;
> int global_array[920];
> struct Person person;
>
> int main() {
> global_var = 3;
> global_array[365] = 16;
> sprintf(person.name, "Lee");
> person.age = 27;
> person.nation = 77;
> return 0;
> }
>
> Cflags:
> -Xlinker --relax
>
> Link relaxation can be turned on by the above option, and in fact
> is turned on by default. After compiling, linking, and disassembling
> the test files, there are the following results:
>
> Before this patch:
> Disassembly of section \.text:
> 0+[0-9a-f]+ <main>:
> .*:[ ]+[0-9a-f]+[ ]+sw[ ]+[0-9a-f]+,gp,\-[0-9]+ # [0-9a-f]+ <global_var>
> .*:[ ]+[0-9a-f]+[ ]+c\.lui[ ]+.*
> .*:[ ]+[0-9a-f]+[ ]+addi[ ]+[0-9a-f]+,[0-9a-f]+,[0-9]+ # [0-9a-f]+ <global_array>
> .*:[ ]+[0-9a-f]+[ ]+c\.lui[ ]+.*
> .*:[ ]+[0-9a-f]+[ ]+addi[ ]+[0-9a-f]+,[0-9a-f]+,[0-9]+ # [0-9a-f]+ <person>
>
> After this patch:
> Disassembly of section \.text:
> 0+[0-9a-f]+ <main>:
> .*:[ ]+[0-9a-f]+[ ]+sw[ ]+[0-9a-f]+,gp,\-[0-9]+ # [0-9a-f]+ <global_var>
> .*:[ ]+[0-9a-f]+[ ]+addi[ ]+[0-9a-f]+,gp,\-[0-9]+ # [0-9a-f]+ <global_array>
> .*:[ ]+[0-9a-f]+[ ]+addi[ ]+[0-9a-f]+,gp,\-[0-9]+ # [0-9a-f]+ <person>
>
> After applying this patch, both array element and structure member
> accesses have been optimized. In fact, for both array elements and
> structure members, access is based on the data and the base address
> of the structure, which is the address of the first member of the
> array or structure. However, current linker takes into account the
> size of arrays and structures when performing GP-relative addressing.
> As a result, some array and structure bases within the GP-relative
> addressing range cannot benefit from relaxation optimization. This
> patch resolves this issue.
>
> Signed-off-by: Die Li <lidie@eswincomputing.com>
>
> ChangeLog:
>
> * bfd/elfnn-riscv.c (_bfd_riscv_relax_section): Update reserve_size.
> * ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp: New test entry.
> * ld/testsuite/ld-riscv-elf/gp-relax.d: New test.
> * ld/testsuite/ld-riscv-elf/gp-relax.s: New test.
> ---
> bfd/elfnn-riscv.c | 7 +++++
> ld/testsuite/ld-riscv-elf/gp-relax.d | 12 +++++++++
> ld/testsuite/ld-riscv-elf/gp-relax.s | 31 ++++++++++++++++++++++
> ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp | 1 +
> 4 files changed, 51 insertions(+)
> create mode 100644 ld/testsuite/ld-riscv-elf/gp-relax.d
> create mode 100644 ld/testsuite/ld-riscv-elf/gp-relax.s
>
> diff --git a/bfd/elfnn-riscv.c b/bfd/elfnn-riscv.c
> index 09aa7be225e..29fc5484e0f 100644
> --- a/bfd/elfnn-riscv.c
> +++ b/bfd/elfnn-riscv.c
> @@ -5169,6 +5169,13 @@ _bfd_riscv_relax_section (bfd *abfd, asection *sec,
> if (h->type != STT_FUNC)
> reserve_size =
> (h->size - rel->r_addend) > h->size ? 0 : h->size - rel->r_addend;
> +
> + /* For global pointer relative addressing, it is sufficient to ensure
> + that the symbol's base address falls within the range of global
> + pointer relative addressing. */
> + if (h->type == STT_OBJECT)
> + reserve_size = 0;
> +
> symtype = h->type;
> }
>
> diff --git a/ld/testsuite/ld-riscv-elf/gp-relax.d b/ld/testsuite/ld-riscv-elf/gp-relax.d
> new file mode 100644
> index 00000000000..ec2f59b1b19
> --- /dev/null
> +++ b/ld/testsuite/ld-riscv-elf/gp-relax.d
> @@ -0,0 +1,12 @@
> +#source: gp-relax.s
> +#ld: --relax
> +#objdump: -d -Mno-aliases
> +
> +.*:[ ]+file format .*
> +
> +
> +Disassembly of section \.text:
> +
> +0+[0-9a-f]+ <_start>:
> +.*:[ ]+[0-9a-f]+[ ]+addi[ ]+a5,gp,\-[0-9]+ # [0-9a-f]+ <global_array>
> +.*:[ ]+[0-9a-f]+[ ]+addi[ ]+a4,gp,[0-9]+ # [0-9a-f]+ <person>
> diff --git a/ld/testsuite/ld-riscv-elf/gp-relax.s b/ld/testsuite/ld-riscv-elf/gp-relax.s
> new file mode 100644
> index 00000000000..05548888ebf
> --- /dev/null
> +++ b/ld/testsuite/ld-riscv-elf/gp-relax.s
> @@ -0,0 +1,31 @@
> + .text
> + .globl global_var
> + .section .sbss,"aw",@nobits
> + .align 2
> + .type global_var, @object
> + .size global_var, 4
> +global_var:
> + .zero 4
> + .globl global_array
> + .bss
> + .align 3
> + .type global_array, @object
> + .size global_array, 3680
> +global_array:
> + .zero 3680
> + .globl person
> + .align 3
> + .type person, @object
> + .size person, 428
> +person:
> + .zero 428
> + .text
> + .align 1
> + .globl _start
> + .type _start, @function
> +_start:
> + lui a5,%hi(global_array)
> + addi a5,a5,%lo(global_array)
> +
> + lui a4,%hi(person)
> + addi a4,a4,%lo(person)
> diff --git a/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp b/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp
> index 947a266ba72..6a04955b23b 100644
> --- a/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp
> +++ b/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp
> @@ -124,6 +124,7 @@ if [istarget "riscv*-*-*"] {
> run_dump_test "pcgp-relax-01"
> run_dump_test "pcgp-relax-01-norelaxgp"
> run_dump_test "pcgp-relax-02"
> + run_dump_test "gp-relax"
> run_dump_test "c-lui"
> run_dump_test "c-lui-2"
> run_dump_test "disas-jalr"
> --
> 2.17.1
>
On Tue, 27 Jun 2023 08:43:42 PDT (-0700), kito.cheng@sifive.com wrote:
> Testcase in your git commit has a base offset, but the testcase in the
> code only contains a base with no offset?
>
> I am a little concerned about the case with an offset like "lui
> a5,%hi(global_array+256) addi a5,a5,%lo(global_array+256)" is still
> right for this optimization?
Ya, I think there's probably a bug somewhere around there: maybe this,
or maybe reusing hi-parts, or something. It's pretty close to the
release, so let's hold off on this one until we have time -- Nelson and
I are still trying to figure out this PC-relative GP SHN_ABS stuff...
>
> On Tue, Jun 27, 2023 at 8:47 PM Die Li <lidie@eswincomputing.com> wrote:
>>
>> Consider the following test:
>>
>> //test.c file
>>
>> struct Person {
>> char name[20];
>> int age;
>> int nation;
>> int hobby[100];
>> };
>>
>> int global_var;
>> int global_array[920];
>> struct Person person;
>>
>> int main() {
>> global_var = 3;
>> global_array[365] = 16;
>> sprintf(person.name, "Lee");
>> person.age = 27;
>> person.nation = 77;
>> return 0;
>> }
>>
>> Cflags:
>> -Xlinker --relax
>>
>> Link relaxation can be turned on by the above option, and in fact
>> is turned on by default. After compiling, linking, and disassembling
>> the test files, there are the following results:
>>
>> Before this patch:
>> Disassembly of section \.text:
>> 0+[0-9a-f]+ <main>:
>> .*:[ ]+[0-9a-f]+[ ]+sw[ ]+[0-9a-f]+,gp,\-[0-9]+ # [0-9a-f]+ <global_var>
>> .*:[ ]+[0-9a-f]+[ ]+c\.lui[ ]+.*
>> .*:[ ]+[0-9a-f]+[ ]+addi[ ]+[0-9a-f]+,[0-9a-f]+,[0-9]+ # [0-9a-f]+ <global_array>
>> .*:[ ]+[0-9a-f]+[ ]+c\.lui[ ]+.*
>> .*:[ ]+[0-9a-f]+[ ]+addi[ ]+[0-9a-f]+,[0-9a-f]+,[0-9]+ # [0-9a-f]+ <person>
>>
>> After this patch:
>> Disassembly of section \.text:
>> 0+[0-9a-f]+ <main>:
>> .*:[ ]+[0-9a-f]+[ ]+sw[ ]+[0-9a-f]+,gp,\-[0-9]+ # [0-9a-f]+ <global_var>
>> .*:[ ]+[0-9a-f]+[ ]+addi[ ]+[0-9a-f]+,gp,\-[0-9]+ # [0-9a-f]+ <global_array>
>> .*:[ ]+[0-9a-f]+[ ]+addi[ ]+[0-9a-f]+,gp,\-[0-9]+ # [0-9a-f]+ <person>
>>
>> After applying this patch, both array element and structure member
>> accesses have been optimized. In fact, for both array elements and
>> structure members, access is based on the data and the base address
>> of the structure, which is the address of the first member of the
>> array or structure. However, current linker takes into account the
>> size of arrays and structures when performing GP-relative addressing.
>> As a result, some array and structure bases within the GP-relative
>> addressing range cannot benefit from relaxation optimization. This
>> patch resolves this issue.
>>
>> Signed-off-by: Die Li <lidie@eswincomputing.com>
>>
>> ChangeLog:
>>
>> * bfd/elfnn-riscv.c (_bfd_riscv_relax_section): Update reserve_size.
>> * ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp: New test entry.
>> * ld/testsuite/ld-riscv-elf/gp-relax.d: New test.
>> * ld/testsuite/ld-riscv-elf/gp-relax.s: New test.
>> ---
>> bfd/elfnn-riscv.c | 7 +++++
>> ld/testsuite/ld-riscv-elf/gp-relax.d | 12 +++++++++
>> ld/testsuite/ld-riscv-elf/gp-relax.s | 31 ++++++++++++++++++++++
>> ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp | 1 +
>> 4 files changed, 51 insertions(+)
>> create mode 100644 ld/testsuite/ld-riscv-elf/gp-relax.d
>> create mode 100644 ld/testsuite/ld-riscv-elf/gp-relax.s
>>
>> diff --git a/bfd/elfnn-riscv.c b/bfd/elfnn-riscv.c
>> index 09aa7be225e..29fc5484e0f 100644
>> --- a/bfd/elfnn-riscv.c
>> +++ b/bfd/elfnn-riscv.c
>> @@ -5169,6 +5169,13 @@ _bfd_riscv_relax_section (bfd *abfd, asection *sec,
>> if (h->type != STT_FUNC)
>> reserve_size =
>> (h->size - rel->r_addend) > h->size ? 0 : h->size - rel->r_addend;
>> +
>> + /* For global pointer relative addressing, it is sufficient to ensure
>> + that the symbol's base address falls within the range of global
>> + pointer relative addressing. */
>> + if (h->type == STT_OBJECT)
>> + reserve_size = 0;
>> +
>> symtype = h->type;
>> }
>>
>> diff --git a/ld/testsuite/ld-riscv-elf/gp-relax.d b/ld/testsuite/ld-riscv-elf/gp-relax.d
>> new file mode 100644
>> index 00000000000..ec2f59b1b19
>> --- /dev/null
>> +++ b/ld/testsuite/ld-riscv-elf/gp-relax.d
>> @@ -0,0 +1,12 @@
>> +#source: gp-relax.s
>> +#ld: --relax
>> +#objdump: -d -Mno-aliases
>> +
>> +.*:[ ]+file format .*
>> +
>> +
>> +Disassembly of section \.text:
>> +
>> +0+[0-9a-f]+ <_start>:
>> +.*:[ ]+[0-9a-f]+[ ]+addi[ ]+a5,gp,\-[0-9]+ # [0-9a-f]+ <global_array>
>> +.*:[ ]+[0-9a-f]+[ ]+addi[ ]+a4,gp,[0-9]+ # [0-9a-f]+ <person>
>> diff --git a/ld/testsuite/ld-riscv-elf/gp-relax.s b/ld/testsuite/ld-riscv-elf/gp-relax.s
>> new file mode 100644
>> index 00000000000..05548888ebf
>> --- /dev/null
>> +++ b/ld/testsuite/ld-riscv-elf/gp-relax.s
>> @@ -0,0 +1,31 @@
>> + .text
>> + .globl global_var
>> + .section .sbss,"aw",@nobits
>> + .align 2
>> + .type global_var, @object
>> + .size global_var, 4
>> +global_var:
>> + .zero 4
>> + .globl global_array
>> + .bss
>> + .align 3
>> + .type global_array, @object
>> + .size global_array, 3680
>> +global_array:
>> + .zero 3680
>> + .globl person
>> + .align 3
>> + .type person, @object
>> + .size person, 428
>> +person:
>> + .zero 428
>> + .text
>> + .align 1
>> + .globl _start
>> + .type _start, @function
>> +_start:
>> + lui a5,%hi(global_array)
>> + addi a5,a5,%lo(global_array)
>> +
>> + lui a4,%hi(person)
>> + addi a4,a4,%lo(person)
>> diff --git a/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp b/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp
>> index 947a266ba72..6a04955b23b 100644
>> --- a/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp
>> +++ b/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp
>> @@ -124,6 +124,7 @@ if [istarget "riscv*-*-*"] {
>> run_dump_test "pcgp-relax-01"
>> run_dump_test "pcgp-relax-01-norelaxgp"
>> run_dump_test "pcgp-relax-02"
>> + run_dump_test "gp-relax"
>> run_dump_test "c-lui"
>> run_dump_test "c-lui-2"
>> run_dump_test "disas-jalr"
>> --
>> 2.17.1
>>
Hi,
On 2023-06-27 23:43, Kito Cheng wrote:
>
>Testcase in your git commit has a base offset, but the testcase in the
>code only contains a base with no offset?
>
>I am a little concerned about the case with an offset like "lui
>a5,%hi(global_array+256) addi a5,a5,%lo(global_array+256)" is still
>right for this optimization?
In the _bfd_riscv_relax_section function of elfnn-riscv.c, the linker includes
the addend, which is the offset based on the symbol's original address, in
the symbol's address. Please refer to the following code:
```elfnn-riscv.c
if (sym_sec->sec_info_type == SEC_INFO_TYPE_MERGE
&& (sym_sec->flags & SEC_MERGE))
...
else
symval += rel->r_addend;
```
In this code, the linker performs a series of conditional checks for the section
to which the symbol belongs. If the section's type is MERGE and the section's
flags include SEC_MERGE, it indicates that the section is used for merging data.
Except for such cases, that is if the section is not been merge into segment, the
linker adds the base offset to the symbol's address.
Regarding the concern about the patch not breaking access to symbols with offsets,
the else statement "symval += rel->r_addend;" ensures that the addend is correctly
applied to the symbol's address, allowing access to symbols with offsets without any
issues.
there is a more detailed testcase to explain it:
//test.s file
.text
.globl global_var
.section .sbss,"aw",@nobits
.align 2
.type global_var, @object
.size global_var, 4
global_var:
.zero 4
.globl global_array
.bss
.align 3
.type global_array, @object
.size global_array, 3680
global_array:
.zero 3680
.globl person
.align 3
.type person, @object
.size person, 428
person:
.zero 428
.text
.align 1
.globl _start
.type _start, @function
_start:
lui a5,%hi(global_array)
addi a5,a5,%lo(global_array)
lui a3,%hi(global_array + 0x100)
addi a3,a3,%lo(global_array + 0x100)
lui a3,%hi(global_array + 0x1000)
addi a3,a3,%lo(global_array + 0x1000)
lui a4,%hi(person)
addi a4,a4,%lo(person)
In this example, I add two different offsets to the symbol `global_array`.
The first offset is relatively small, as shown in the instructions:
```
lui a3,%hi(global_array + 0x100)
addi a3,a3,%lo(global_array + 0x100)
```
In this case, the sum of the offset and the `global_array` address falls
within the range that can be reached by the global pointer (`gp`).
The second offset is much larger, as shown in the instructions:
```
lui a3,%hi(global_array + 0x1000)
addi a3,a3,%lo(global_array + 0x1000)
```
In this case, the offset is `0x1000`, which clearly exceeds the range that can be
reached by gp. Therefore, the address computation using `lui` and `addi`
instructions will not be possible within the GP-relative range.
After assembling, linking with the --relax flag and disassembling:
without this patch as reference, we have:
00000000000100e8 <_start>:
100e8: 000117b7 lui a5,0x11
100ec: 11078793 addi a5,a5,272 # 11110 <global_array>
100f0: 000116b7 lui a3,0x11
100f4: 21068693 addi a3,a3,528 # 11210 <global_array+0x100>
100f8: 000126b7 lui a3,0x12
100fc: 11068693 addi a3,a3,272 # 12110 <person+0x1a0>
10100: 00012737 lui a4,0x12
10104: f7070713 addi a4,a4,-144 # 11f70 <person>
with this patch, we have:
00000000000100e8 <_start>:
100e8: 80418793 addi a5,gp,-2044 # 11100 <global_array>
100ec: 90418693 addi a3,gp,-1788 # 11200 <global_array+0x100>
100f0: 000126b7 lui a3,0x12
100f4: 10068693 addi a3,a3,256 # 12100 <person+0x1a0>
100f8: 66418713 addi a4,gp,1636 # 11f60 <person>
As expected, providing a large offset to global_array does not result in relaxation.
However, when using a suitable offset that keeps global_array + offset within
the range that can be reached by the global pointer (gp), it can be relaxed during
the linking process.
>
>On Tue, Jun 27, 2023 at 8:47 PM Die Li <lidie@eswincomputing.com> wrote:
>>
>> Consider the following test:
>>
>> //test.c file
>>
>> struct Person {
>> char name[20];
>> int age;
>> int nation;
>> int hobby[100];
>> };
>>
>> int global_var;
>> int global_array[920];
>> struct Person person;
>>
>> int main() {
>> global_var = 3;
>> global_array[365] = 16;
>> sprintf(person.name, "Lee");
>> person.age = 27;
>> person.nation = 77;
>> return 0;
>> }
>>
>> Cflags:
>> -Xlinker --relax
>>
>> Link relaxation can be turned on by the above option, and in fact
>> is turned on by default. After compiling, linking, and disassembling
>> the test files, there are the following results:
>>
>> Before this patch:
>> Disassembly of section \.text:
>> 0+[0-9a-f]+ <main>:
>> .*:[ ]+[0-9a-f]+[ ]+sw[ ]+[0-9a-f]+,gp,\-[0-9]+ # [0-9a-f]+ <global_var>
>> .*:[ ]+[0-9a-f]+[ ]+c\.lui[ ]+.*
>> .*:[ ]+[0-9a-f]+[ ]+addi[ ]+[0-9a-f]+,[0-9a-f]+,[0-9]+ # [0-9a-f]+ <global_array>
>> .*:[ ]+[0-9a-f]+[ ]+c\.lui[ ]+.*
>> .*:[ ]+[0-9a-f]+[ ]+addi[ ]+[0-9a-f]+,[0-9a-f]+,[0-9]+ # [0-9a-f]+ <person>
>>
>> After this patch:
>> Disassembly of section \.text:
>> 0+[0-9a-f]+ <main>:
>> .*:[ ]+[0-9a-f]+[ ]+sw[ ]+[0-9a-f]+,gp,\-[0-9]+ # [0-9a-f]+ <global_var>
>> .*:[ ]+[0-9a-f]+[ ]+addi[ ]+[0-9a-f]+,gp,\-[0-9]+ # [0-9a-f]+ <global_array>
>> .*:[ ]+[0-9a-f]+[ ]+addi[ ]+[0-9a-f]+,gp,\-[0-9]+ # [0-9a-f]+ <person>
>>
>> After applying this patch, both array element and structure member
>> accesses have been optimized. In fact, for both array elements and
>> structure members, access is based on the data and the base address
>> of the structure, which is the address of the first member of the
>> array or structure. However, current linker takes into account the
>> size of arrays and structures when performing GP-relative addressing.
>> As a result, some array and structure bases within the GP-relative
>> addressing range cannot benefit from relaxation optimization. This
>> patch resolves this issue.
>>
>> Signed-off-by: Die Li <lidie@eswincomputing.com>
>>
>> ChangeLog:
>>
>> * bfd/elfnn-riscv.c (_bfd_riscv_relax_section): Update reserve_size.
>> * ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp: New test entry.
>> * ld/testsuite/ld-riscv-elf/gp-relax.d: New test.
>> * ld/testsuite/ld-riscv-elf/gp-relax.s: New test.
>> ---
>> bfd/elfnn-riscv.c | 7 +++++
>> ld/testsuite/ld-riscv-elf/gp-relax.d | 12 +++++++++
>> ld/testsuite/ld-riscv-elf/gp-relax.s | 31 ++++++++++++++++++++++
>> ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp | 1 +
>> 4 files changed, 51 insertions(+)
>> create mode 100644 ld/testsuite/ld-riscv-elf/gp-relax.d
>> create mode 100644 ld/testsuite/ld-riscv-elf/gp-relax.s
>>
>> diff --git a/bfd/elfnn-riscv.c b/bfd/elfnn-riscv.c
>> index 09aa7be225e..29fc5484e0f 100644
>> --- a/bfd/elfnn-riscv.c
>> +++ b/bfd/elfnn-riscv.c
>> @@ -5169,6 +5169,13 @@ _bfd_riscv_relax_section (bfd *abfd, asection *sec,
>> if (h->type != STT_FUNC)
>> reserve_size =
>> (h->size - rel->r_addend) > h->size ? 0 : h->size - rel->r_addend;
>> +
>> + /* For global pointer relative addressing, it is sufficient to ensure
>> + that the symbol's base address falls within the range of global
>> + pointer relative addressing. */
>> + if (h->type == STT_OBJECT)
>> + reserve_size = 0;
>> +
>> symtype = h->type;
>> }
>>
>> diff --git a/ld/testsuite/ld-riscv-elf/gp-relax.d b/ld/testsuite/ld-riscv-elf/gp-relax.d
>> new file mode 100644
>> index 00000000000..ec2f59b1b19
>> --- /dev/null
>> +++ b/ld/testsuite/ld-riscv-elf/gp-relax.d
>> @@ -0,0 +1,12 @@
>> +#source: gp-relax.s
>> +#ld: --relax
>> +#objdump: -d -Mno-aliases
>> +
>> +.*:[ ]+file format .*
>> +
>> +
>> +Disassembly of section \.text:
>> +
>> +0+[0-9a-f]+ <_start>:
>> +.*:[ ]+[0-9a-f]+[ ]+addi[ ]+a5,gp,\-[0-9]+ # [0-9a-f]+ <global_array>
>> +.*:[ ]+[0-9a-f]+[ ]+addi[ ]+a4,gp,[0-9]+ # [0-9a-f]+ <person>
>> diff --git a/ld/testsuite/ld-riscv-elf/gp-relax.s b/ld/testsuite/ld-riscv-elf/gp-relax.s
>> new file mode 100644
>> index 00000000000..05548888ebf
>> --- /dev/null
>> +++ b/ld/testsuite/ld-riscv-elf/gp-relax.s
>> @@ -0,0 +1,31 @@
>> + .text
>> + .globl global_var
>> + .section .sbss,"aw",@nobits
>> + .align 2
>> + .type global_var, @object
>> + .size global_var, 4
>> +global_var:
>> + .zero 4
>> + .globl global_array
>> + .bss
>> + .align 3
>> + .type global_array, @object
>> + .size global_array, 3680
>> +global_array:
>> + .zero 3680
>> + .globl person
>> + .align 3
>> + .type person, @object
>> + .size person, 428
>> +person:
>> + .zero 428
>> + .text
>> + .align 1
>> + .globl _start
>> + .type _start, @function
>> +_start:
>> + lui a5,%hi(global_array)
>> + addi a5,a5,%lo(global_array)
>> +
>> + lui a4,%hi(person)
>> + addi a4,a4,%lo(person)
>> diff --git a/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp b/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp
>> index 947a266ba72..6a04955b23b 100644
>> --- a/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp
>> +++ b/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp
>> @@ -124,6 +124,7 @@ if [istarget "riscv*-*-*"] {
>> run_dump_test "pcgp-relax-01"
>> run_dump_test "pcgp-relax-01-norelaxgp"
>> run_dump_test "pcgp-relax-02"
>> + run_dump_test "gp-relax"
>> run_dump_test "c-lui"
>> run_dump_test "c-lui-2"
>> run_dump_test "disas-jalr"
>> --
>> 2.17.1
>>
On 6/27/23 19:25, Palmer Dabbelt wrote:
> On Tue, 27 Jun 2023 08:43:42 PDT (-0700), kito.cheng@sifive.com wrote:
>> Testcase in your git commit has a base offset, but the testcase in the
>> code only contains a base with no offset?
>>
>> I am a little concerned about the case with an offset like "lui
>> a5,%hi(global_array+256) addi a5,a5,%lo(global_array+256)" is still
>> right for this optimization?
>
> Ya, I think there's probably a bug somewhere around there: maybe this,
> or maybe reusing hi-parts, or something. It's pretty close to the
> release, so let's hold off on this one until we have time -- Nelson and
> I are still trying to figure out this PC-relative GP SHN_ABS stuff...
My recollection from working in this space extensively on the PA is that
when the highpart and lowpart have the same offset, you're always OK.
When the offsets differ, you have to worry about carries. To deal with
that HP defined "rounding" mode relocations. Essentially they would
round the component to an 8k boundary.
Then in the compiler we had code to rewrite the address to expose common
bases to CSE:
> For the PA, transform:
>
> memory(X + <large int>)
>
> into:
>
> if (<large int> & mask) >= 16
> Y = (<large int> & ~mask) + mask + 1 Round up.
> else
> Y = (<large int> & ~mask) Round down.
> Z = X + Y
> memory (Z + (<large int> - Y));
>
> This is for CSE to find several similar references, and only use one Z.
>
> X can either be a SYMBOL_REF or REG, but because combine cannot
> perform a 4->2 combination we do nothing for SYMBOL_REF + D where
> D will not fit in 14 bits.
>
> MODE_FLOAT references allow displacements which fit in 5 bits, so use
> 0x1f as the mask.
>
> MODE_INT references allow displacements which fit in 14 bits, so use
> 0x3fff as the mask.
I don't offhand remember where "16" above came from (it's been 25+ years
since I wrote that code). I kept wanting to think it came from the
overlap between the bits set by the highpart instrution (21 bits) and
the lowpart (14 bits). But the math doesn't work and we do the same
transformation for FP modes where we just have 5 bits of offset for the
lowpart. Hmmm.
The HP linker also had the ability to eliminate unnecessary highpart
computations as well. Though that was quite dicey as you had to do
things like create bounds around jump tables which would inhibit the
linker optimizer (jump tables were pure code and relied on each entry in
the table being precisely 2 instructions).
This was all for static binaries. PIC hadn't really taken hold at this
point in the 90s. Many of the concepts are probably still useful in the
embedded world where static linking is still a common thing.
jeff
@@ -5169,6 +5169,13 @@ _bfd_riscv_relax_section (bfd *abfd, asection *sec,
if (h->type != STT_FUNC)
reserve_size =
(h->size - rel->r_addend) > h->size ? 0 : h->size - rel->r_addend;
+
+ /* For global pointer relative addressing, it is sufficient to ensure
+ that the symbol's base address falls within the range of global
+ pointer relative addressing. */
+ if (h->type == STT_OBJECT)
+ reserve_size = 0;
+
symtype = h->type;
}
new file mode 100644
@@ -0,0 +1,12 @@
+#source: gp-relax.s
+#ld: --relax
+#objdump: -d -Mno-aliases
+
+.*:[ ]+file format .*
+
+
+Disassembly of section \.text:
+
+0+[0-9a-f]+ <_start>:
+.*:[ ]+[0-9a-f]+[ ]+addi[ ]+a5,gp,\-[0-9]+ # [0-9a-f]+ <global_array>
+.*:[ ]+[0-9a-f]+[ ]+addi[ ]+a4,gp,[0-9]+ # [0-9a-f]+ <person>
new file mode 100644
@@ -0,0 +1,31 @@
+ .text
+ .globl global_var
+ .section .sbss,"aw",@nobits
+ .align 2
+ .type global_var, @object
+ .size global_var, 4
+global_var:
+ .zero 4
+ .globl global_array
+ .bss
+ .align 3
+ .type global_array, @object
+ .size global_array, 3680
+global_array:
+ .zero 3680
+ .globl person
+ .align 3
+ .type person, @object
+ .size person, 428
+person:
+ .zero 428
+ .text
+ .align 1
+ .globl _start
+ .type _start, @function
+_start:
+ lui a5,%hi(global_array)
+ addi a5,a5,%lo(global_array)
+
+ lui a4,%hi(person)
+ addi a4,a4,%lo(person)
@@ -124,6 +124,7 @@ if [istarget "riscv*-*-*"] {
run_dump_test "pcgp-relax-01"
run_dump_test "pcgp-relax-01-norelaxgp"
run_dump_test "pcgp-relax-02"
+ run_dump_test "gp-relax"
run_dump_test "c-lui"
run_dump_test "c-lui-2"
run_dump_test "disas-jalr"