RISC-V: Use tp as gp when no TLS is used.

Message ID 20230803115107.63736-1-lidie@eswincomputing.com
State Accepted
Headers
Series RISC-V: Use tp as gp when no TLS is used. |

Checks

Context Check Description
snail/binutils-gdb-check success Github commit url

Commit Message

Die Li Aug. 3, 2023, 11:51 a.m. UTC
  This patch extends the GP-relative addressing mode by adding a 4K address
range on top of GP+2k, using the TP register instead of GP for relative
addressing. This allows more data to benefit from the relative addressing
access mode. As TP register is used for this optimization, it checks for
the presence of TLS (Thread-Local Storage) sections to ensure that the
optimization is performed only when TP is available. Additionally, a linker
option, "--tp-as-gp", is introduced to control the enabling of this optimization.

Take the "tp_as_gp.s" from this patch as an example to illustrate its optimization effect:

Before this patch:
After assembling and processing with "--relax", we have:
00000000000100e8 <_start>:
   100e8:	67c5                	lui	a5,0x11
   100ea:	0f878793          	add	a5,a5,248 # 110f8 <global_array>
   100ee:	67c9                	lui	a5,0x12
   100f0:	6d878793          	add	a5,a5,1752 # 126d8 <global_array2>

With this patch:
After assembling and processing with "--relax --tp-as-gp", we have:
00000000000100e8 <_start>:
   100e8:	67c5                	lui	a5,0x11
   100ea:	0f878793          	add	a5,a5,248 # 110f8 <global_array>
   100ee:	de620793          	add	a5,tp,-538 # .*

Check the symbol table, we have:
     6: 00000000000118f4     0 NOTYPE  GLOBAL DEFAULT  ABS __global_pointer$

The addressable range of TP is increased by 4K beyond the addressable range of GP,
thus setting TP to GP+0x1000. So the access to global_array2, which was originally
represented as "lui a5,0x12" followed by "add a5,a5,1752 # 126d8", is optimized to
"add a5,tp,-538", which is feasible.

Signed-off-by: Die Li <lidie@eswincomputing.com>

ChangeLog:

        * bfd/elfnn-riscv.c (tpoff):
        (_bfd_riscv_relax_lui): Convert R_RISCV_HI20 to R_RISCV_TPREL_HI20, 
                                R_RISCV_LO12_I to R_RISCV_TPREL_LO12_I,
                                R_RISCV_LO12_S to R_RISCV_TPREL_LO12_S
                                when use tp as gp.
        * ld/ldlex.h (enum option_values): Add OPTION_RELAX_TP_AS_GP.
        * ld/ldmain.c (main): Add option state.
        * ld/ldmain.h (ENABLE_TP_AS_GP): New macro to control option.
        * ld/lexsup.c (parse_args): Add --tp-as-gp option.
        * ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp: Add test entry.
        * ld/testsuite/ld-riscv-elf/tp_as_gp.d: New test.
        * ld/testsuite/ld-riscv-elf/tp_as_gp.s: New test.

include/ChangeLog:

        * bfdlink.h (struct bfd_link_info): Clarify the option state.
        * elf/riscv.h (ENABLE_TP_AS_GP): New macro relative with option.
        (USED_TP_AS_GP): Likewise.
---
 bfd/elfnn-riscv.c                          | 52 +++++++++++++++++-----
 include/bfdlink.h                          |  6 +++
 include/elf/riscv.h                        |  6 +++
 ld/ldlex.h                                 |  1 +
 ld/ldmain.c                                |  1 +
 ld/ldmain.h                                |  2 +
 ld/lexsup.c                                |  5 +++
 ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp |  1 +
 ld/testsuite/ld-riscv-elf/tp_as_gp.d       | 14 ++++++
 ld/testsuite/ld-riscv-elf/tp_as_gp.s       | 33 ++++++++++++++
 10 files changed, 110 insertions(+), 11 deletions(-)
 create mode 100644 ld/testsuite/ld-riscv-elf/tp_as_gp.d
 create mode 100644 ld/testsuite/ld-riscv-elf/tp_as_gp.s
  

Comments

Nelson Chu Aug. 3, 2023, 11:58 p.m. UTC | #1
Hi,

I cannot see any about this relaxation in riscv psabi,
https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#linker-relaxation-types.
If we haven't discussed this in psabi, then you should have one there.  All
stuff that might affect not only GNU guys will need to be discussed in the
psabi first, since lots of LLVM guys are also there.

Thanks
Nelson

On Thu, Aug 3, 2023 at 7:51 PM Die Li <lidie@eswincomputing.com> wrote:

> This patch extends the GP-relative addressing mode by adding a 4K address
> range on top of GP+2k, using the TP register instead of GP for relative
> addressing. This allows more data to benefit from the relative addressing
> access mode. As TP register is used for this optimization, it checks for
> the presence of TLS (Thread-Local Storage) sections to ensure that the
> optimization is performed only when TP is available. Additionally, a linker
> option, "--tp-as-gp", is introduced to control the enabling of this
> optimization.
>
> Take the "tp_as_gp.s" from this patch as an example to illustrate its
> optimization effect:
>
> Before this patch:
> After assembling and processing with "--relax", we have:
> 00000000000100e8 <_start>:
>    100e8:       67c5                    lui     a5,0x11
>    100ea:       0f878793                add     a5,a5,248 # 110f8
> <global_array>
>    100ee:       67c9                    lui     a5,0x12
>    100f0:       6d878793                add     a5,a5,1752 # 126d8
> <global_array2>
>
> With this patch:
> After assembling and processing with "--relax --tp-as-gp", we have:
> 00000000000100e8 <_start>:
>    100e8:       67c5                    lui     a5,0x11
>    100ea:       0f878793                add     a5,a5,248 # 110f8
> <global_array>
>    100ee:       de620793                add     a5,tp,-538 # .*
>
> Check the symbol table, we have:
>      6: 00000000000118f4     0 NOTYPE  GLOBAL DEFAULT  ABS
> __global_pointer$
>
> The addressable range of TP is increased by 4K beyond the addressable
> range of GP,
> thus setting TP to GP+0x1000. So the access to global_array2, which was
> originally
> represented as "lui a5,0x12" followed by "add a5,a5,1752 # 126d8", is
> optimized to
> "add a5,tp,-538", which is feasible.
>
> Signed-off-by: Die Li <lidie@eswincomputing.com>
>
> ChangeLog:
>
>         * bfd/elfnn-riscv.c (tpoff):
>         (_bfd_riscv_relax_lui): Convert R_RISCV_HI20 to
> R_RISCV_TPREL_HI20,
>                                 R_RISCV_LO12_I to R_RISCV_TPREL_LO12_I,
>                                 R_RISCV_LO12_S to R_RISCV_TPREL_LO12_S
>                                 when use tp as gp.
>         * ld/ldlex.h (enum option_values): Add OPTION_RELAX_TP_AS_GP.
>         * ld/ldmain.c (main): Add option state.
>         * ld/ldmain.h (ENABLE_TP_AS_GP): New macro to control option.
>         * ld/lexsup.c (parse_args): Add --tp-as-gp option.
>         * ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp: Add test entry.
>         * ld/testsuite/ld-riscv-elf/tp_as_gp.d: New test.
>         * ld/testsuite/ld-riscv-elf/tp_as_gp.s: New test.
>
> include/ChangeLog:
>
>         * bfdlink.h (struct bfd_link_info): Clarify the option state.
>         * elf/riscv.h (ENABLE_TP_AS_GP): New macro relative with option.
>         (USED_TP_AS_GP): Likewise.
> ---
>  bfd/elfnn-riscv.c                          | 52 +++++++++++++++++-----
>  include/bfdlink.h                          |  6 +++
>  include/elf/riscv.h                        |  6 +++
>  ld/ldlex.h                                 |  1 +
>  ld/ldmain.c                                |  1 +
>  ld/ldmain.h                                |  2 +
>  ld/lexsup.c                                |  5 +++
>  ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp |  1 +
>  ld/testsuite/ld-riscv-elf/tp_as_gp.d       | 14 ++++++
>  ld/testsuite/ld-riscv-elf/tp_as_gp.s       | 33 ++++++++++++++
>  10 files changed, 110 insertions(+), 11 deletions(-)
>  create mode 100644 ld/testsuite/ld-riscv-elf/tp_as_gp.d
>  create mode 100644 ld/testsuite/ld-riscv-elf/tp_as_gp.s
>
> diff --git a/bfd/elfnn-riscv.c b/bfd/elfnn-riscv.c
> index 09aa7be225e..9d418521839 100644
> --- a/bfd/elfnn-riscv.c
> +++ b/bfd/elfnn-riscv.c
> @@ -1700,17 +1700,6 @@ dtpoff (struct bfd_link_info *info, bfd_vma address)
>    return address - elf_hash_table (info)->tls_sec->vma - DTP_OFFSET;
>  }
>
> -/* Return the relocation value for a static TLS tp-relative relocation.
> */
> -
> -static bfd_vma
> -tpoff (struct bfd_link_info *info, bfd_vma address)
> -{
> -  /* If tls_sec is NULL, we should have signalled an error already.  */
> -  if (elf_hash_table (info)->tls_sec == NULL)
> -    return 0;
> -  return address - elf_hash_table (info)->tls_sec->vma - TP_OFFSET;
> -}
> -
>  /* Return the global pointer's value, or 0 if it is not in use.  */
>
>  static bfd_vma
> @@ -1725,6 +1714,25 @@ riscv_global_pointer_value (struct bfd_link_info
> *info)
>    return h->u.def.value + sec_addr (h->u.def.section);
>  }
>
> +/* Return the relocation value for a static TLS tp-relative relocation.
> */
> +
> +static bfd_vma
> +tpoff (struct bfd_link_info *info, bfd_vma address)
> +{
> +  /* If tls_sec is NULL, we try to use tp as gp, otherwise signal an
> error.  */
> +  if (elf_hash_table (info)->tls_sec == NULL)
> +  {
> +    if (info->tp_as_gp & USED_TP_AS_GP)
> +    {
> +      bfd_vma gp = riscv_global_pointer_value (info);
> +      bfd_vma tp = gp + 0x1000;
> +      return address - tp;
> +    }
> +    return 0;
> +  }
> +  return address - elf_hash_table (info)->tls_sec->vma - TP_OFFSET;
> +}
> +
>  /* Emplace a static relocation.  */
>
>  static bfd_reloc_status_type
> @@ -4657,6 +4665,28 @@ _bfd_riscv_relax_lui (bfd *abfd,
>         }
>      }
>
> +  unsigned sym = ELFNN_R_SYM (rel->r_info);
> +  if ((link_info->tp_as_gp & ENABLE_TP_AS_GP)
> +      && (elf_hash_table (link_info)->tls_sec == NULL))
> +  {
> +    bfd_vma tp = gp + 0x1000;
> +    if (undefined_weak
> +        || VALID_ITYPE_IMM (symval)
> +        || (symval >= tp
> +           && VALID_ITYPE_IMM (symval - tp + max_alignment +
> reserve_size))
> +        || (symval < tp
> +           && VALID_ITYPE_IMM (symval - tp - max_alignment -
> reserve_size)))
> +    {
> +      link_info->tp_as_gp |= USED_TP_AS_GP;
> +      if (ELFNN_R_TYPE (rel->r_info) == R_RISCV_HI20)
> +        rel->r_info = ELFNN_R_INFO (sym, R_RISCV_TPREL_HI20);
> +      if (ELFNN_R_TYPE (rel->r_info) == R_RISCV_LO12_I)
> +        rel->r_info = ELFNN_R_INFO (sym, R_RISCV_TPREL_LO12_I);
> +      if (ELFNN_R_TYPE (rel->r_info) == R_RISCV_LO12_S)
> +        rel->r_info = ELFNN_R_INFO (sym, R_RISCV_TPREL_LO12_S);
> +    }
> +  }
> +
>    /* Can we relax LUI to C.LUI?  Alignment might move the section forward;
>       account for this assuming page alignment at worst. In the presence
> of
>       RELRO segment the linker aligns it by one page size, therefore
> sections
> diff --git a/include/bfdlink.h b/include/bfdlink.h
> index 840790a298c..bf79035341d 100644
> --- a/include/bfdlink.h
> +++ b/include/bfdlink.h
> @@ -557,6 +557,12 @@ struct bfd_link_info
>    /* TRUE if commonpagesize is set on command-line.  */
>    unsigned int commonpagesize_is_set : 1;
>
> +  /* Tri-state variable:
> +     0 => option --tp-as-gp is not specified by user.
> +     1 => option --tp-as-gp is specified by user, but not used.
> +     3 => option --tp-as-gp is specified by user, and has been used.  */
> +  unsigned int tp_as_gp: 2;
> +
>    /* Char that may appear as the first char of a symbol, but should be
>       skipped (like symbol_leading_char) when looking up symbols in
>       wrap_hash.  Used by PowerPC Linux for 'dot' symbols.  */
> diff --git a/include/elf/riscv.h b/include/elf/riscv.h
> index 0aa8b3359c4..5dfe7aee00d 100644
> --- a/include/elf/riscv.h
> +++ b/include/elf/riscv.h
> @@ -115,6 +115,12 @@ END_RELOC_NUMBERS (R_RISCV_max)
>  /* File uses the 32E base integer instruction.  */
>  #define EF_RISCV_RVE 0x0008
>
> +/* Enable the switch of option --tp-as-gp.  */
> +#define ENABLE_TP_AS_GP 0x1
> +
> +/* Use the switch of option --tp-as-gp.  */
> +#define USED_TP_AS_GP 0x2
> +
>  /* The name of the global pointer symbol.  */
>  #define RISCV_GP_SYMBOL "__global_pointer$"
>
> diff --git a/ld/ldlex.h b/ld/ldlex.h
> index 87cac02141d..ebd9e6282fb 100644
> --- a/ld/ldlex.h
> +++ b/ld/ldlex.h
> @@ -54,6 +54,7 @@ enum option_values
>    OPTION_OFORMAT,
>    OPTION_RELAX,
>    OPTION_NO_RELAX,
> +  OPTION_RELAX_TP_AS_GP,
>    OPTION_NO_SYMBOLIC,
>    OPTION_RETAIN_SYMBOLS_FILE,
>    OPTION_RPATH,
> diff --git a/ld/ldmain.c b/ld/ldmain.c
> index 06ac2c64fa8..d5c8fa62687 100644
> --- a/ld/ldmain.c
> +++ b/ld/ldmain.c
> @@ -339,6 +339,7 @@ main (int argc, char **argv)
>    link_info.combreloc = true;
>    link_info.strip_discarded = true;
>    link_info.prohibit_multiple_definition_absolute = false;
> +  link_info.tp_as_gp = 0;
>    link_info.textrel_check = DEFAULT_LD_TEXTREL_CHECK;
>    link_info.emit_hash = DEFAULT_EMIT_SYSV_HASH;
>    link_info.emit_gnu_hash = DEFAULT_EMIT_GNU_HASH;
> diff --git a/ld/ldmain.h b/ld/ldmain.h
> index dda124b96e8..f03ea59daea 100644
> --- a/ld/ldmain.h
> +++ b/ld/ldmain.h
> @@ -56,6 +56,8 @@ extern char *error_handling_script;
>    do { link_info.disable_target_specific_optimizations = 2; } while (0)
>  #define ENABLE_RELAXATION              \
>    do { link_info.disable_target_specific_optimizations = 0; } while (0)
> +#define ENABLE_TP_AS_GP                 \
> +  do { link_info.tp_as_gp = true; } while (0)
>
>  extern void add_ysym (const char *);
>  extern void add_wrap (const char *);
> diff --git a/ld/lexsup.c b/ld/lexsup.c
> index fe8722313fe..eaa126475ba 100644
> --- a/ld/lexsup.c
> +++ b/ld/lexsup.c
> @@ -457,6 +457,8 @@ static const struct ld_option ld_options[] =
>      '\0', NULL, N_("Reduce code size by using target specific
> optimizations"), TWO_DASHES },
>    { {"no-relax", no_argument, NULL, OPTION_NO_RELAX},
>      '\0', NULL, N_("Do not use relaxation techniques to reduce code
> size"), TWO_DASHES },
> +  { {"tp-as-gp", no_argument, NULL, OPTION_RELAX_TP_AS_GP},
> +    '\0', NULL, N_("Use tp as gp when there is no multithreaded scenario
> that requires the tp"), TWO_DASHES },
>    { {"retain-symbols-file", required_argument, NULL,
>       OPTION_RETAIN_SYMBOLS_FILE},
>      '\0', N_("FILE"), N_("Keep only symbols listed in FILE"), TWO_DASHES
> },
> @@ -1286,6 +1288,9 @@ parse_args (unsigned argc, char **argv)
>         case OPTION_RELAX:
>           ENABLE_RELAXATION;
>           break;
> +        case OPTION_RELAX_TP_AS_GP:
> +          ENABLE_TP_AS_GP;
> +          break;
>         case OPTION_RETAIN_SYMBOLS_FILE:
>           add_keepsyms_file (optarg);
>           break;
> diff --git a/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp
> b/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp
> index 947a266ba72..45a1d7e487e 100644
> --- a/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp
> +++ b/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp
> @@ -124,6 +124,7 @@ if [istarget "riscv*-*-*"] {
>      run_dump_test "pcgp-relax-01"
>      run_dump_test "pcgp-relax-01-norelaxgp"
>      run_dump_test "pcgp-relax-02"
> +    run_dump_test "tp_as_gp"
>      run_dump_test "c-lui"
>      run_dump_test "c-lui-2"
>      run_dump_test "disas-jalr"
> diff --git a/ld/testsuite/ld-riscv-elf/tp_as_gp.d
> b/ld/testsuite/ld-riscv-elf/tp_as_gp.d
> new file mode 100644
> index 00000000000..51b2e141e63
> --- /dev/null
> +++ b/ld/testsuite/ld-riscv-elf/tp_as_gp.d
> @@ -0,0 +1,14 @@
> +#source: tp_as_gp.s
> +#as:
> +#ld: --relax --tp-as-gp
> +#objdump: -d
> +
> +
> +.*:[   ]+file format .*
> +
> +Disassembly of section \.text:
> +
> +0+[0-9a-f]+ <_start>:
> +.*:[   ]+[0-9a-f]+[    ]+lui[  ]+a5,.*
> +.*:[   ]+[0-9a-f]+[    ]+add[  ]+a5,a5,[0-9]+ # [0-9a-f]+ <global_array>
> +.*:[   ]+[0-9a-f]+[    ]+add[  ]+a5,tp,\-[0-9]+ # [0-9a-f]+ .*
> \ No newline at end of file
> diff --git a/ld/testsuite/ld-riscv-elf/tp_as_gp.s
> b/ld/testsuite/ld-riscv-elf/tp_as_gp.s
> new file mode 100644
> index 00000000000..6d723b6a14b
> --- /dev/null
> +++ b/ld/testsuite/ld-riscv-elf/tp_as_gp.s
> @@ -0,0 +1,33 @@
> +       .option nopic
> +       .attribute arch,
> "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
> +       .attribute unaligned_access, 0
> +       .attribute stack_align, 16
> +       .text
> +
> +       .globl  global_array
> +       .bss
> +       .align  3
> +       .type   global_array, @object
> +       .size   global_array, 5600
> +global_array:
> +       .zero   5600
> +
> +       .globl  global_array2
> +       .bss
> +       .align  3
> +       .type   global_array3, @object
> +       .size   global_array3, 200
> +global_array2:
> +       .zero   200
> +
> +       .text
> +       .align  1
> +       .globl  _start
> +       .type   _start, @function
> +_start:
> +       lui     a5,%hi(global_array)
> +       addi    a5,a5,%lo(global_array)
> +
> +       lui     a5,%hi(global_array2)
> +       addi    a5,a5,%lo(global_array2)
> +
> --
> 2.17.1
>
>
  
Jeff Law Aug. 4, 2023, 5:28 a.m. UTC | #2
On 8/3/23 05:51, Die Li wrote:
> This patch extends the GP-relative addressing mode by adding a 4K address
> range on top of GP+2k, using the TP register instead of GP for relative
> addressing. This allows more data to benefit from the relative addressing
> access mode. As TP register is used for this optimization, it checks for
> the presence of TLS (Thread-Local Storage) sections to ensure that the
> optimization is performed only when TP is available. Additionally, a linker
> option, "--tp-as-gp", is introduced to control the enabling of this optimization.
> 
> Take the "tp_as_gp.s" from this patch as an example to illustrate its optimization effect:
> 
> Before this patch:
> After assembling and processing with "--relax", we have:
> 00000000000100e8 <_start>:
>     100e8:	67c5                	lui	a5,0x11
>     100ea:	0f878793          	add	a5,a5,248 # 110f8 <global_array>
>     100ee:	67c9                	lui	a5,0x12
>     100f0:	6d878793          	add	a5,a5,1752 # 126d8 <global_array2>
> 
> With this patch:
> After assembling and processing with "--relax --tp-as-gp", we have:
> 00000000000100e8 <_start>:
>     100e8:	67c5                	lui	a5,0x11
>     100ea:	0f878793          	add	a5,a5,248 # 110f8 <global_array>
>     100ee:	de620793          	add	a5,tp,-538 # .*
> 
> Check the symbol table, we have:
>       6: 00000000000118f4     0 NOTYPE  GLOBAL DEFAULT  ABS __global_pointer$
> 
> The addressable range of TP is increased by 4K beyond the addressable range of GP,
> thus setting TP to GP+0x1000. So the access to global_array2, which was originally
> represented as "lui a5,0x12" followed by "add a5,a5,1752 # 126d8", is optimized to
> "add a5,tp,-538", which is feasible.
> 
> Signed-off-by: Die Li <lidie@eswincomputing.com>
> 
> ChangeLog:
> 
>          * bfd/elfnn-riscv.c (tpoff):
>          (_bfd_riscv_relax_lui): Convert R_RISCV_HI20 to R_RISCV_TPREL_HI20,
>                                  R_RISCV_LO12_I to R_RISCV_TPREL_LO12_I,
>                                  R_RISCV_LO12_S to R_RISCV_TPREL_LO12_S
>                                  when use tp as gp.
>          * ld/ldlex.h (enum option_values): Add OPTION_RELAX_TP_AS_GP.
>          * ld/ldmain.c (main): Add option state.
>          * ld/ldmain.h (ENABLE_TP_AS_GP): New macro to control option.
>          * ld/lexsup.c (parse_args): Add --tp-as-gp option.
>          * ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp: Add test entry.
>          * ld/testsuite/ld-riscv-elf/tp_as_gp.d: New test.
>          * ld/testsuite/ld-riscv-elf/tp_as_gp.s: New test.
Is this actually safe?  Consider that the linked executable/DSO  may not 
have any TLS sections, but it may dlopen a DSO which does have TLS 
sections.   The linker doesn't have the visibility to know if this 
happens or not.

Furthermore, there's nothing that says a program couldn't have its own 
implementation that behaves like dlopen, but doesn't actually use dlopen 
(xorg IIRC used to do something like this).

ISTM this can only be done when the user has explicitly asked for this 
behavior.

jeff
  
Die Li Aug. 4, 2023, 7:35 a.m. UTC | #3
Hi,
On 2023-08-04 13:28,  Jeff Law wrote:
>
>
>
>On 8/3/23 05:51, Die Li wrote:
>> This patch extends the GP-relative addressing mode by adding a 4K address
>> range on top of GP+2k, using the TP register instead of GP for relative
>> addressing. This allows more data to benefit from the relative addressing
>> access mode. As TP register is used for this optimization, it checks for
>> the presence of TLS (Thread-Local Storage) sections to ensure that the
>> optimization is performed only when TP is available. Additionally, a linker
>> option, "--tp-as-gp", is introduced to control the enabling of this optimization.
>>
>> Take the "tp_as_gp.s" from this patch as an example to illustrate its optimization effect:
>>
>> Before this patch:
>> After assembling and processing with "--relax", we have:
>> 00000000000100e8 <_start>:
>>     100e8:	67c5                lui	a5,0x11
>>     100ea:	0f878793          add	a5,a5,248 # 110f8 <global_array>
>>     100ee:	67c9                lui	a5,0x12
>>     100f0:	6d878793          add	a5,a5,1752 # 126d8 <global_array2>
>>
>> With this patch:
>> After assembling and processing with "--relax --tp-as-gp", we have:
>> 00000000000100e8 <_start>:
>>     100e8:	67c5                lui	a5,0x11
>>     100ea:	0f878793          add	a5,a5,248 # 110f8 <global_array>
>>     100ee:	de620793          add	a5,tp,-538 # .*
>>
>> Check the symbol table, we have:
>>       6: 00000000000118f4     0 NOTYPE  GLOBAL DEFAULT  ABS __global_pointer$
>>
>> The addressable range of TP is increased by 4K beyond the addressable range of GP,
>> thus setting TP to GP+0x1000. So the access to global_array2, which was originally
>> represented as "lui a5,0x12" followed by "add a5,a5,1752 # 126d8", is optimized to
>> "add a5,tp,-538", which is feasible.
>>
>> Signed-off-by: Die Li <lidie@eswincomputing.com>
>>
>> ChangeLog:
>>
>>          * bfd/elfnn-riscv.c (tpoff):
>>          (_bfd_riscv_relax_lui): Convert R_RISCV_HI20 to R_RISCV_TPREL_HI20,
>>                                  R_RISCV_LO12_I to R_RISCV_TPREL_LO12_I,
>>                                  R_RISCV_LO12_S to R_RISCV_TPREL_LO12_S
>>                                  when use tp as gp.
>>          * ld/ldlex.h (enum option_values): Add OPTION_RELAX_TP_AS_GP.
>>          * ld/ldmain.c (main): Add option state.
>>          * ld/ldmain.h (ENABLE_TP_AS_GP): New macro to control option.
>>          * ld/lexsup.c (parse_args): Add --tp-as-gp option.
>>          * ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp: Add test entry.
>>          * ld/testsuite/ld-riscv-elf/tp_as_gp.d: New test.
>>          * ld/testsuite/ld-riscv-elf/tp_as_gp.s: New test.
>Is this actually safe?  Consider that the linked executable/DSO  may not
>have any TLS sections, but it may dlopen a DSO which does have TLS
>sections.   The linker doesn't have the visibility to know if this
>happens or not.
I noticed that there is a variable named "elf_hash_table (info)->tls_sec" in the linker, 
which indicates whether there is a TLS (Thread Local Storage) section. However, as 
you mentioned, if it is not sufficient, users need to explicitly state that there will be 
no TLS sections. This limitation can be too restrictive for users. I'm a beginner at 
linker. Thanks for your discussion.
>Furthermore, there's nothing that says a program couldn't have its own
>implementation that behaves like dlopen, but doesn't actually use dlopen
>(xorg IIRC used to do something like this).
>
>ISTM this can only be done when the user has explicitly asked for this
>behavior.
>
>jeff
  
Kito Cheng Aug. 4, 2023, 8:18 a.m. UTC | #4
TP as GP has discussed and appeared within the psABI group several times,
and I believe tp-as-gp is useful for some specific embedded applications which
didn't have thread at all, so my expectation is that is not expected work on
linux or any non bare-metal platform - which means we don't really worry
about DSO issue - because it not intended to support that :P

Actually EABI has put that into the list[1], but unfortunately we
don't have enough
resources to develop that, so from the psABI aspect we welcome anyone
who is interested to contribute :)

Back to the patch itself, I think we might need to add a new ELF attribute
for that, and check the compatibility at link time, also need to specify
how tp initialized in the psABI spec, it defined as assume tp = gp + 0x1000,
but I would suggest you could define some extra symbol like
__global_pointer_tp$ for initialized tp, that would be more flexible
and it might be possible to cover more symbol access if it's
not fixed into gp.

[1] https://github.com/riscv-non-isa/riscv-elf-psabi-doc/issues/305

On Fri, Aug 4, 2023 at 3:35 PM Die Li <lidie@eswincomputing.com> wrote:
>
> Hi,
> On 2023-08-04 13:28,  Jeff Law wrote:
> >
> >
> >
> >On 8/3/23 05:51, Die Li wrote:
> >> This patch extends the GP-relative addressing mode by adding a 4K address
> >> range on top of GP+2k, using the TP register instead of GP for relative
> >> addressing. This allows more data to benefit from the relative addressing
> >> access mode. As TP register is used for this optimization, it checks for
> >> the presence of TLS (Thread-Local Storage) sections to ensure that the
> >> optimization is performed only when TP is available. Additionally, a linker
> >> option, "--tp-as-gp", is introduced to control the enabling of this optimization.
> >>
> >> Take the "tp_as_gp.s" from this patch as an example to illustrate its optimization effect:
> >>
> >> Before this patch:
> >> After assembling and processing with "--relax", we have:
> >> 00000000000100e8 <_start>:
> >>     100e8:   67c5                lui a5,0x11
> >>     100ea:   0f878793          add   a5,a5,248 # 110f8 <global_array>
> >>     100ee:   67c9                lui a5,0x12
> >>     100f0:   6d878793          add   a5,a5,1752 # 126d8 <global_array2>
> >>
> >> With this patch:
> >> After assembling and processing with "--relax --tp-as-gp", we have:
> >> 00000000000100e8 <_start>:
> >>     100e8:   67c5                lui a5,0x11
> >>     100ea:   0f878793          add   a5,a5,248 # 110f8 <global_array>
> >>     100ee:   de620793          add   a5,tp,-538 # .*
> >>
> >> Check the symbol table, we have:
> >>       6: 00000000000118f4     0 NOTYPE  GLOBAL DEFAULT  ABS __global_pointer$
> >>
> >> The addressable range of TP is increased by 4K beyond the addressable range of GP,
> >> thus setting TP to GP+0x1000. So the access to global_array2, which was originally
> >> represented as "lui a5,0x12" followed by "add a5,a5,1752 # 126d8", is optimized to
> >> "add a5,tp,-538", which is feasible.
> >>
> >> Signed-off-by: Die Li <lidie@eswincomputing.com>
> >>
> >> ChangeLog:
> >>
> >>          * bfd/elfnn-riscv.c (tpoff):
> >>          (_bfd_riscv_relax_lui): Convert R_RISCV_HI20 to R_RISCV_TPREL_HI20,
> >>                                  R_RISCV_LO12_I to R_RISCV_TPREL_LO12_I,
> >>                                  R_RISCV_LO12_S to R_RISCV_TPREL_LO12_S
> >>                                  when use tp as gp.
> >>          * ld/ldlex.h (enum option_values): Add OPTION_RELAX_TP_AS_GP.
> >>          * ld/ldmain.c (main): Add option state.
> >>          * ld/ldmain.h (ENABLE_TP_AS_GP): New macro to control option.
> >>          * ld/lexsup.c (parse_args): Add --tp-as-gp option.
> >>          * ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp: Add test entry.
> >>          * ld/testsuite/ld-riscv-elf/tp_as_gp.d: New test.
> >>          * ld/testsuite/ld-riscv-elf/tp_as_gp.s: New test.
> >Is this actually safe?  Consider that the linked executable/DSO  may not
> >have any TLS sections, but it may dlopen a DSO which does have TLS
> >sections.   The linker doesn't have the visibility to know if this
> >happens or not.
> I noticed that there is a variable named "elf_hash_table (info)->tls_sec" in the linker,
> which indicates whether there is a TLS (Thread Local Storage) section. However, as
> you mentioned, if it is not sufficient, users need to explicitly state that there will be
> no TLS sections. This limitation can be too restrictive for users. I'm a beginner at
> linker. Thanks for your discussion.
> >Furthermore, there's nothing that says a program couldn't have its own
> >implementation that behaves like dlopen, but doesn't actually use dlopen
> >(xorg IIRC used to do something like this).
> >
> >ISTM this can only be done when the user has explicitly asked for this
> >behavior.
> >
> >jeff
  

Patch

diff --git a/bfd/elfnn-riscv.c b/bfd/elfnn-riscv.c
index 09aa7be225e..9d418521839 100644
--- a/bfd/elfnn-riscv.c
+++ b/bfd/elfnn-riscv.c
@@ -1700,17 +1700,6 @@  dtpoff (struct bfd_link_info *info, bfd_vma address)
   return address - elf_hash_table (info)->tls_sec->vma - DTP_OFFSET;
 }
 
-/* Return the relocation value for a static TLS tp-relative relocation.  */
-
-static bfd_vma
-tpoff (struct bfd_link_info *info, bfd_vma address)
-{
-  /* If tls_sec is NULL, we should have signalled an error already.  */
-  if (elf_hash_table (info)->tls_sec == NULL)
-    return 0;
-  return address - elf_hash_table (info)->tls_sec->vma - TP_OFFSET;
-}
-
 /* Return the global pointer's value, or 0 if it is not in use.  */
 
 static bfd_vma
@@ -1725,6 +1714,25 @@  riscv_global_pointer_value (struct bfd_link_info *info)
   return h->u.def.value + sec_addr (h->u.def.section);
 }
 
+/* Return the relocation value for a static TLS tp-relative relocation.  */
+
+static bfd_vma
+tpoff (struct bfd_link_info *info, bfd_vma address)
+{
+  /* If tls_sec is NULL, we try to use tp as gp, otherwise signal an error.  */
+  if (elf_hash_table (info)->tls_sec == NULL)
+  {
+    if (info->tp_as_gp & USED_TP_AS_GP)
+    {
+      bfd_vma gp = riscv_global_pointer_value (info);
+      bfd_vma tp = gp + 0x1000;
+      return address - tp;
+    }
+    return 0;
+  }
+  return address - elf_hash_table (info)->tls_sec->vma - TP_OFFSET;
+}
+
 /* Emplace a static relocation.  */
 
 static bfd_reloc_status_type
@@ -4657,6 +4665,28 @@  _bfd_riscv_relax_lui (bfd *abfd,
 	}
     }
 
+  unsigned sym = ELFNN_R_SYM (rel->r_info);
+  if ((link_info->tp_as_gp & ENABLE_TP_AS_GP)
+      && (elf_hash_table (link_info)->tls_sec == NULL))
+  {
+    bfd_vma tp = gp + 0x1000;
+    if (undefined_weak
+        || VALID_ITYPE_IMM (symval)
+        || (symval >= tp
+	    && VALID_ITYPE_IMM (symval - tp + max_alignment + reserve_size))
+        || (symval < tp
+	    && VALID_ITYPE_IMM (symval - tp - max_alignment - reserve_size)))
+    {
+      link_info->tp_as_gp |= USED_TP_AS_GP;
+      if (ELFNN_R_TYPE (rel->r_info) == R_RISCV_HI20)
+        rel->r_info = ELFNN_R_INFO (sym, R_RISCV_TPREL_HI20);
+      if (ELFNN_R_TYPE (rel->r_info) == R_RISCV_LO12_I)
+        rel->r_info = ELFNN_R_INFO (sym, R_RISCV_TPREL_LO12_I);
+      if (ELFNN_R_TYPE (rel->r_info) == R_RISCV_LO12_S)
+        rel->r_info = ELFNN_R_INFO (sym, R_RISCV_TPREL_LO12_S);
+    }
+  }
+
   /* Can we relax LUI to C.LUI?  Alignment might move the section forward;
      account for this assuming page alignment at worst. In the presence of 
      RELRO segment the linker aligns it by one page size, therefore sections
diff --git a/include/bfdlink.h b/include/bfdlink.h
index 840790a298c..bf79035341d 100644
--- a/include/bfdlink.h
+++ b/include/bfdlink.h
@@ -557,6 +557,12 @@  struct bfd_link_info
   /* TRUE if commonpagesize is set on command-line.  */
   unsigned int commonpagesize_is_set : 1;
 
+  /* Tri-state variable:
+     0 => option --tp-as-gp is not specified by user.
+     1 => option --tp-as-gp is specified by user, but not used.
+     3 => option --tp-as-gp is specified by user, and has been used.  */
+  unsigned int tp_as_gp: 2;
+
   /* Char that may appear as the first char of a symbol, but should be
      skipped (like symbol_leading_char) when looking up symbols in
      wrap_hash.  Used by PowerPC Linux for 'dot' symbols.  */
diff --git a/include/elf/riscv.h b/include/elf/riscv.h
index 0aa8b3359c4..5dfe7aee00d 100644
--- a/include/elf/riscv.h
+++ b/include/elf/riscv.h
@@ -115,6 +115,12 @@  END_RELOC_NUMBERS (R_RISCV_max)
 /* File uses the 32E base integer instruction.  */
 #define EF_RISCV_RVE 0x0008
 
+/* Enable the switch of option --tp-as-gp.  */
+#define ENABLE_TP_AS_GP 0x1
+
+/* Use the switch of option --tp-as-gp.  */
+#define USED_TP_AS_GP 0x2
+
 /* The name of the global pointer symbol.  */
 #define RISCV_GP_SYMBOL "__global_pointer$"
 
diff --git a/ld/ldlex.h b/ld/ldlex.h
index 87cac02141d..ebd9e6282fb 100644
--- a/ld/ldlex.h
+++ b/ld/ldlex.h
@@ -54,6 +54,7 @@  enum option_values
   OPTION_OFORMAT,
   OPTION_RELAX,
   OPTION_NO_RELAX,
+  OPTION_RELAX_TP_AS_GP,
   OPTION_NO_SYMBOLIC,
   OPTION_RETAIN_SYMBOLS_FILE,
   OPTION_RPATH,
diff --git a/ld/ldmain.c b/ld/ldmain.c
index 06ac2c64fa8..d5c8fa62687 100644
--- a/ld/ldmain.c
+++ b/ld/ldmain.c
@@ -339,6 +339,7 @@  main (int argc, char **argv)
   link_info.combreloc = true;
   link_info.strip_discarded = true;
   link_info.prohibit_multiple_definition_absolute = false;
+  link_info.tp_as_gp = 0;
   link_info.textrel_check = DEFAULT_LD_TEXTREL_CHECK;
   link_info.emit_hash = DEFAULT_EMIT_SYSV_HASH;
   link_info.emit_gnu_hash = DEFAULT_EMIT_GNU_HASH;
diff --git a/ld/ldmain.h b/ld/ldmain.h
index dda124b96e8..f03ea59daea 100644
--- a/ld/ldmain.h
+++ b/ld/ldmain.h
@@ -56,6 +56,8 @@  extern char *error_handling_script;
   do { link_info.disable_target_specific_optimizations = 2; } while (0)
 #define ENABLE_RELAXATION		\
   do { link_info.disable_target_specific_optimizations = 0; } while (0)
+#define ENABLE_TP_AS_GP                 \
+  do { link_info.tp_as_gp = true; } while (0)
 
 extern void add_ysym (const char *);
 extern void add_wrap (const char *);
diff --git a/ld/lexsup.c b/ld/lexsup.c
index fe8722313fe..eaa126475ba 100644
--- a/ld/lexsup.c
+++ b/ld/lexsup.c
@@ -457,6 +457,8 @@  static const struct ld_option ld_options[] =
     '\0', NULL, N_("Reduce code size by using target specific optimizations"), TWO_DASHES },
   { {"no-relax", no_argument, NULL, OPTION_NO_RELAX},
     '\0', NULL, N_("Do not use relaxation techniques to reduce code size"), TWO_DASHES },
+  { {"tp-as-gp", no_argument, NULL, OPTION_RELAX_TP_AS_GP},
+    '\0', NULL, N_("Use tp as gp when there is no multithreaded scenario that requires the tp"), TWO_DASHES },
   { {"retain-symbols-file", required_argument, NULL,
      OPTION_RETAIN_SYMBOLS_FILE},
     '\0', N_("FILE"), N_("Keep only symbols listed in FILE"), TWO_DASHES },
@@ -1286,6 +1288,9 @@  parse_args (unsigned argc, char **argv)
 	case OPTION_RELAX:
 	  ENABLE_RELAXATION;
 	  break;
+        case OPTION_RELAX_TP_AS_GP:
+          ENABLE_TP_AS_GP;
+          break;
 	case OPTION_RETAIN_SYMBOLS_FILE:
 	  add_keepsyms_file (optarg);
 	  break;
diff --git a/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp b/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp
index 947a266ba72..45a1d7e487e 100644
--- a/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp
+++ b/ld/testsuite/ld-riscv-elf/ld-riscv-elf.exp
@@ -124,6 +124,7 @@  if [istarget "riscv*-*-*"] {
     run_dump_test "pcgp-relax-01"
     run_dump_test "pcgp-relax-01-norelaxgp"
     run_dump_test "pcgp-relax-02"
+    run_dump_test "tp_as_gp"
     run_dump_test "c-lui"
     run_dump_test "c-lui-2"
     run_dump_test "disas-jalr"
diff --git a/ld/testsuite/ld-riscv-elf/tp_as_gp.d b/ld/testsuite/ld-riscv-elf/tp_as_gp.d
new file mode 100644
index 00000000000..51b2e141e63
--- /dev/null
+++ b/ld/testsuite/ld-riscv-elf/tp_as_gp.d
@@ -0,0 +1,14 @@ 
+#source: tp_as_gp.s
+#as:
+#ld: --relax --tp-as-gp
+#objdump: -d
+
+
+.*:[ 	]+file format .*
+
+Disassembly of section \.text:
+
+0+[0-9a-f]+ <_start>:
+.*:[ 	]+[0-9a-f]+[ 	]+lui[ 	]+a5,.*
+.*:[ 	]+[0-9a-f]+[ 	]+add[ 	]+a5,a5,[0-9]+ # [0-9a-f]+ <global_array>
+.*:[ 	]+[0-9a-f]+[ 	]+add[ 	]+a5,tp,\-[0-9]+ # [0-9a-f]+ .*
\ No newline at end of file
diff --git a/ld/testsuite/ld-riscv-elf/tp_as_gp.s b/ld/testsuite/ld-riscv-elf/tp_as_gp.s
new file mode 100644
index 00000000000..6d723b6a14b
--- /dev/null
+++ b/ld/testsuite/ld-riscv-elf/tp_as_gp.s
@@ -0,0 +1,33 @@ 
+	.option nopic
+	.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
+	.attribute unaligned_access, 0
+	.attribute stack_align, 16
+	.text
+
+	.globl	global_array
+	.bss
+	.align	3
+	.type	global_array, @object
+	.size	global_array, 5600
+global_array:
+	.zero	5600
+        
+	.globl	global_array2
+	.bss
+	.align	3
+	.type	global_array3, @object
+	.size	global_array3, 200
+global_array2:
+	.zero	200
+
+	.text
+	.align	1
+	.globl	_start
+	.type	_start, @function
+_start:
+	lui	a5,%hi(global_array)
+	addi	a5,a5,%lo(global_array)
+
+	lui	a5,%hi(global_array2)
+	addi	a5,a5,%lo(global_array2)
+