[5/7] modpost: detect section mismatch for R_ARM_THM_{MOVW_ABS_NC,MOVT_ABS}

Message ID 20230601121001.1071533-6-masahiroy@kernel.org
State New
Headers
Series modpost: fix section mismatch detection for ARM |

Commit Message

Masahiro Yamada June 1, 2023, 12:09 p.m. UTC
  When CONFIG_THUMB2_KERNEL is enabled, modpost fails to detect some
types of section mismatches.

  [test code]

    #include <linux/init.h>

    int __initdata foo;
    int get_foo(void) { return foo; }

It is apparently a bad reference, but modpost does not report anything.

The test code above produces the following relocations.

  Relocation section '.rel.text' at offset 0x1e8 contains 2 entries:
   Offset     Info    Type            Sym.Value  Sym. Name
  00000000  0000052f R_ARM_THM_MOVW_AB 00000000   .LANCHOR0
  00000004  00000530 R_ARM_THM_MOVT_AB 00000000   .LANCHOR0

Currently, R_ARM_THM_MOVW_ABS_NC and R_ARM_THM_MOVT_ABS are just skipped.

Add code to handle them. I checked arch/arm/kernel/module.c to learn
how the offset is encoded in the instruction.

One more thing to note for Thumb instructions - the st_value is an odd
value, so you need to mask the bit 0 to get the offset. Otherwise, you
will get an off-by-one error in the nearest symbol look-up.

It is documented in "ELF for the ARM Architecture" [1]:

  * If the symbol addresses a Thumb instruction, its value is the address
    of the instruction with bit zero set (in a relocatable object, the
    section offset with bit zero set).

  * For the purposes of relocation the value used shall be the address
    of the instruction (st_value & ~1).

[1]: https://github.com/ARM-software/abi-aa/blob/main/aaelf32/aaelf32.rst

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---

 scripts/mod/modpost.c | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)
  

Comments

Ard Biesheuvel June 1, 2023, 12:23 p.m. UTC | #1
On Thu, 1 Jun 2023 at 14:10, Masahiro Yamada <masahiroy@kernel.org> wrote:
>
> When CONFIG_THUMB2_KERNEL is enabled, modpost fails to detect some
> types of section mismatches.
>
>   [test code]
>
>     #include <linux/init.h>
>
>     int __initdata foo;
>     int get_foo(void) { return foo; }
>
> It is apparently a bad reference, but modpost does not report anything.
>
> The test code above produces the following relocations.
>
>   Relocation section '.rel.text' at offset 0x1e8 contains 2 entries:
>    Offset     Info    Type            Sym.Value  Sym. Name
>   00000000  0000052f R_ARM_THM_MOVW_AB 00000000   .LANCHOR0
>   00000004  00000530 R_ARM_THM_MOVT_AB 00000000   .LANCHOR0
>
> Currently, R_ARM_THM_MOVW_ABS_NC and R_ARM_THM_MOVT_ABS are just skipped.
>
> Add code to handle them. I checked arch/arm/kernel/module.c to learn
> how the offset is encoded in the instruction.
>
> One more thing to note for Thumb instructions - the st_value is an odd
> value, so you need to mask the bit 0 to get the offset. Otherwise, you
> will get an off-by-one error in the nearest symbol look-up.
>
> It is documented in "ELF for the ARM Architecture" [1]:
>
>   * If the symbol addresses a Thumb instruction, its value is the address
>     of the instruction with bit zero set (in a relocatable object, the
>     section offset with bit zero set).
>
>   * For the purposes of relocation the value used shall be the address
>     of the instruction (st_value & ~1).
>
> [1]: https://github.com/ARM-software/abi-aa/blob/main/aaelf32/aaelf32.rst
>
> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
> ---
>
>  scripts/mod/modpost.c | 31 ++++++++++++++++++++++++++-----
>  1 file changed, 26 insertions(+), 5 deletions(-)
>
> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
> index 32d56efe3f3b..528aa9175e84 100644
> --- a/scripts/mod/modpost.c
> +++ b/scripts/mod/modpost.c
> @@ -1082,7 +1082,8 @@ static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
>  {
>         Elf_Sym *sym;
>         Elf_Sym *near = NULL;
> -       Elf_Addr distance;
> +       Elf_Addr sym_addr, distance;
> +       bool is_arm = (elf->hdr->e_machine == EM_ARM);
>
>         for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
>                 if (get_secindex(elf, sym) != secndx)
> @@ -1090,10 +1091,19 @@ static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
>                 if (!is_valid_name(elf, sym))
>                         continue;
>
> -               if (addr >= sym->st_value)
> -                       distance = addr - sym->st_value;
> +               sym_addr = sym->st_value;
> +
> +               /*
> +                * For ARM Thumb instruction, the bit 0 of st_value is set.
> +                * Mask it to get the address.
> +                */
> +               if (is_arm)
> +                        sym_addr &= ~1;
> +

This is only appropriate for STT_FUNC symbols. If this is a data
reference, bit 0 could be a valid address bit.



> +               if (addr >= sym_addr)
> +                       distance = addr - sym_addr;
>                 else if (allow_negative)
> -                       distance = sym->st_value - addr;
> +                       distance = sym_addr - addr;
>                 else
>                         continue;
>
> @@ -1266,7 +1276,7 @@ static int addend_arm_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r)
>         unsigned int r_typ = ELF_R_TYPE(r->r_info);
>         Elf_Sym *sym = elf->symtab_start + ELF_R_SYM(r->r_info);
>         void *loc = reloc_location(elf, sechdr, r);
> -       uint32_t inst;
> +       uint32_t inst, upper, lower;
>         int32_t offset;
>
>         switch (r_typ) {
> @@ -1288,6 +1298,17 @@ static int addend_arm_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r)
>                 offset = sign_extend32((inst & 0x00ffffff) << 2, 25);
>                 r->r_addend = offset + sym->st_value + 8;
>                 break;
> +       case R_ARM_THM_MOVW_ABS_NC:
> +       case R_ARM_THM_MOVT_ABS:
> +               upper = TO_NATIVE(*(uint16_t *)loc);
> +               lower = TO_NATIVE(*((uint16_t *)loc + 1));
> +               offset = sign_extend32(((upper & 0x000f) << 12) |
> +                                      ((upper & 0x0400) << 1) |
> +                                      ((lower & 0x7000) >> 4) |
> +                                      (lower & 0x00ff),
> +                                      15);
> +               r->r_addend = offset + sym->st_value;
> +               break;
>         case R_ARM_THM_CALL:
>         case R_ARM_THM_JUMP24:
>         case R_ARM_THM_JUMP19:
> --
> 2.39.2
>
  
Masahiro Yamada June 1, 2023, 2:28 p.m. UTC | #2
On Thu, Jun 1, 2023 at 9:23 PM Ard Biesheuvel <ardb@kernel.org> wrote:
>
> On Thu, 1 Jun 2023 at 14:10, Masahiro Yamada <masahiroy@kernel.org> wrote:
> >
> > When CONFIG_THUMB2_KERNEL is enabled, modpost fails to detect some
> > types of section mismatches.
> >
> >   [test code]
> >
> >     #include <linux/init.h>
> >
> >     int __initdata foo;
> >     int get_foo(void) { return foo; }
> >
> > It is apparently a bad reference, but modpost does not report anything.
> >
> > The test code above produces the following relocations.
> >
> >   Relocation section '.rel.text' at offset 0x1e8 contains 2 entries:
> >    Offset     Info    Type            Sym.Value  Sym. Name
> >   00000000  0000052f R_ARM_THM_MOVW_AB 00000000   .LANCHOR0
> >   00000004  00000530 R_ARM_THM_MOVT_AB 00000000   .LANCHOR0
> >
> > Currently, R_ARM_THM_MOVW_ABS_NC and R_ARM_THM_MOVT_ABS are just skipped.
> >
> > Add code to handle them. I checked arch/arm/kernel/module.c to learn
> > how the offset is encoded in the instruction.
> >
> > One more thing to note for Thumb instructions - the st_value is an odd
> > value, so you need to mask the bit 0 to get the offset. Otherwise, you
> > will get an off-by-one error in the nearest symbol look-up.
> >
> > It is documented in "ELF for the ARM Architecture" [1]:
> >
> >   * If the symbol addresses a Thumb instruction, its value is the address
> >     of the instruction with bit zero set (in a relocatable object, the
> >     section offset with bit zero set).
> >
> >   * For the purposes of relocation the value used shall be the address
> >     of the instruction (st_value & ~1).
> >
> > [1]: https://github.com/ARM-software/abi-aa/blob/main/aaelf32/aaelf32.rst
> >
> > Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
> > ---
> >
> >  scripts/mod/modpost.c | 31 ++++++++++++++++++++++++++-----
> >  1 file changed, 26 insertions(+), 5 deletions(-)
> >
> > diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
> > index 32d56efe3f3b..528aa9175e84 100644
> > --- a/scripts/mod/modpost.c
> > +++ b/scripts/mod/modpost.c
> > @@ -1082,7 +1082,8 @@ static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
> >  {
> >         Elf_Sym *sym;
> >         Elf_Sym *near = NULL;
> > -       Elf_Addr distance;
> > +       Elf_Addr sym_addr, distance;
> > +       bool is_arm = (elf->hdr->e_machine == EM_ARM);
> >
> >         for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> >                 if (get_secindex(elf, sym) != secndx)
> > @@ -1090,10 +1091,19 @@ static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
> >                 if (!is_valid_name(elf, sym))
> >                         continue;
> >
> > -               if (addr >= sym->st_value)
> > -                       distance = addr - sym->st_value;
> > +               sym_addr = sym->st_value;
> > +
> > +               /*
> > +                * For ARM Thumb instruction, the bit 0 of st_value is set.
> > +                * Mask it to get the address.
> > +                */
> > +               if (is_arm)
> > +                        sym_addr &= ~1;
> > +
>
> This is only appropriate for STT_FUNC symbols. If this is a data
> reference, bit 0 could be a valid address bit.


Thanks for catching it.

I will fix it as follows:

    /*
     * For ARM Thumb instruction, the bit 0 of st_value is set if
     * the symbol is STT_FUNC type. Mask it to get the address.
     */
    if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
            sym_addr &= ~1;
  

Patch

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 32d56efe3f3b..528aa9175e84 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1082,7 +1082,8 @@  static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
 {
 	Elf_Sym *sym;
 	Elf_Sym *near = NULL;
-	Elf_Addr distance;
+	Elf_Addr sym_addr, distance;
+	bool is_arm = (elf->hdr->e_machine == EM_ARM);
 
 	for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
 		if (get_secindex(elf, sym) != secndx)
@@ -1090,10 +1091,19 @@  static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
 		if (!is_valid_name(elf, sym))
 			continue;
 
-		if (addr >= sym->st_value)
-			distance = addr - sym->st_value;
+		sym_addr = sym->st_value;
+
+		/*
+		 * For ARM Thumb instruction, the bit 0 of st_value is set.
+		 * Mask it to get the address.
+		 */
+		if (is_arm)
+			 sym_addr &= ~1;
+
+		if (addr >= sym_addr)
+			distance = addr - sym_addr;
 		else if (allow_negative)
-			distance = sym->st_value - addr;
+			distance = sym_addr - addr;
 		else
 			continue;
 
@@ -1266,7 +1276,7 @@  static int addend_arm_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r)
 	unsigned int r_typ = ELF_R_TYPE(r->r_info);
 	Elf_Sym *sym = elf->symtab_start + ELF_R_SYM(r->r_info);
 	void *loc = reloc_location(elf, sechdr, r);
-	uint32_t inst;
+	uint32_t inst, upper, lower;
 	int32_t offset;
 
 	switch (r_typ) {
@@ -1288,6 +1298,17 @@  static int addend_arm_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r)
 		offset = sign_extend32((inst & 0x00ffffff) << 2, 25);
 		r->r_addend = offset + sym->st_value + 8;
 		break;
+	case R_ARM_THM_MOVW_ABS_NC:
+	case R_ARM_THM_MOVT_ABS:
+		upper = TO_NATIVE(*(uint16_t *)loc);
+		lower = TO_NATIVE(*((uint16_t *)loc + 1));
+		offset = sign_extend32(((upper & 0x000f) << 12) |
+				       ((upper & 0x0400) << 1) |
+				       ((lower & 0x7000) >> 4) |
+				       (lower & 0x00ff),
+				       15);
+		r->r_addend = offset + sym->st_value;
+		break;
 	case R_ARM_THM_CALL:
 	case R_ARM_THM_JUMP24:
 	case R_ARM_THM_JUMP19: