[1/2] x86/boot: robustify calling startup_{32,64}() from the decompressor code

Message ID 20221031151047.167288-2-alexandr.lobakin@intel.com
State New
Headers
Series x86/boot: fix relying on link order |

Commit Message

Alexander Lobakin Oct. 31, 2022, 3:10 p.m. UTC
  After commit ce697ccee1a8 ("kbuild: remove head-y syntax"), I
started digging whether x86 is ready from removing this old cruft.
Removing its objects from the list makes the kernel unbootable.
This applies only to bzImage, vmlinux still works correctly.
The reason is that with no strict object order determined by the
linker arguments, not the linker script, startup_64 can be placed
not right at the beginning of the kernel.
Here's vmlinux.map's beginning before removing:

ffffffff81000000         vmlinux.o:(.head.text)
ffffffff81000000                 startup_64
ffffffff81000070                 secondary_startup_64
ffffffff81000075                 secondary_startup_64_no_verify
ffffffff81000160                 verify_cpu

and after:

ffffffff81000000         vmlinux.o:(.head.text)
ffffffff81000000                 pvh_start_xen
ffffffff81000080                 startup_64
ffffffff810000f0                 secondary_startup_64
ffffffff810000f5                 secondary_startup_64_no_verify

Not a problem itself, but the self-extractor code has the address of
that function hardcoded the beginning, not looking onto the ELF
header, which always contains the address of startup_{32,64}().

So, instead of doing an "act of blind faith", just take the address
from the ELF header and extract a relative offset to the entry
point. The decompressor function already returns a pointer to the
beginning of the kernel to the Asm code, which then jumps to it,
so add that offset to the return value.
This doesn't change anything for now, but allows to resign from the
"head object list" for x86 and makes sure valid Kbuild or any other
improvements won't break anything here in general.

Signed-off-by: Alexander Lobakin <alexandr.lobakin@intel.com>
---
 arch/x86/boot/compressed/head_32.S |  2 +-
 arch/x86/boot/compressed/head_64.S |  2 +-
 arch/x86/boot/compressed/misc.c    | 16 +++++++++++-----
 3 files changed, 13 insertions(+), 7 deletions(-)
  

Comments

H. Peter Anvin Oct. 31, 2022, 8:31 p.m. UTC | #1
On October 31, 2022 8:10:46 AM PDT, Alexander Lobakin <alexandr.lobakin@intel.com> wrote:
>After commit ce697ccee1a8 ("kbuild: remove head-y syntax"), I
>started digging whether x86 is ready from removing this old cruft.
>Removing its objects from the list makes the kernel unbootable.
>This applies only to bzImage, vmlinux still works correctly.
>The reason is that with no strict object order determined by the
>linker arguments, not the linker script, startup_64 can be placed
>not right at the beginning of the kernel.
>Here's vmlinux.map's beginning before removing:
>
>ffffffff81000000         vmlinux.o:(.head.text)
>ffffffff81000000                 startup_64
>ffffffff81000070                 secondary_startup_64
>ffffffff81000075                 secondary_startup_64_no_verify
>ffffffff81000160                 verify_cpu
>
>and after:
>
>ffffffff81000000         vmlinux.o:(.head.text)
>ffffffff81000000                 pvh_start_xen
>ffffffff81000080                 startup_64
>ffffffff810000f0                 secondary_startup_64
>ffffffff810000f5                 secondary_startup_64_no_verify
>
>Not a problem itself, but the self-extractor code has the address of
>that function hardcoded the beginning, not looking onto the ELF
>header, which always contains the address of startup_{32,64}().
>
>So, instead of doing an "act of blind faith", just take the address
>from the ELF header and extract a relative offset to the entry
>point. The decompressor function already returns a pointer to the
>beginning of the kernel to the Asm code, which then jumps to it,
>so add that offset to the return value.
>This doesn't change anything for now, but allows to resign from the
>"head object list" for x86 and makes sure valid Kbuild or any other
>improvements won't break anything here in general.
>
>Signed-off-by: Alexander Lobakin <alexandr.lobakin@intel.com>
>---
> arch/x86/boot/compressed/head_32.S |  2 +-
> arch/x86/boot/compressed/head_64.S |  2 +-
> arch/x86/boot/compressed/misc.c    | 16 +++++++++++-----
> 3 files changed, 13 insertions(+), 7 deletions(-)
>
>diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
>index 3b354eb9516d..56f9847e208b 100644
>--- a/arch/x86/boot/compressed/head_32.S
>+++ b/arch/x86/boot/compressed/head_32.S
>@@ -187,7 +187,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
> 	leal	boot_heap@GOTOFF(%ebx), %eax
> 	pushl	%eax			/* heap area */
> 	pushl	%esi			/* real mode pointer */
>-	call	extract_kernel		/* returns kernel location in %eax */
>+	call	extract_kernel		/* returns kernel entry point in %eax */
> 	addl	$24, %esp
> 
> /*
>diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
>index d33f060900d2..aeba5aa3d26c 100644
>--- a/arch/x86/boot/compressed/head_64.S
>+++ b/arch/x86/boot/compressed/head_64.S
>@@ -593,7 +593,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
> 	movl	input_len(%rip), %ecx	/* input_len */
> 	movq	%rbp, %r8		/* output target address */
> 	movl	output_len(%rip), %r9d	/* decompressed length, end of relocs */
>-	call	extract_kernel		/* returns kernel location in %rax */
>+	call	extract_kernel		/* returns kernel entry point in %rax */
> 	popq	%rsi
> 
> /*
>diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
>index cf690d8712f4..96ae5af29f49 100644
>--- a/arch/x86/boot/compressed/misc.c
>+++ b/arch/x86/boot/compressed/misc.c
>@@ -277,7 +277,7 @@ static inline void handle_relocations(void *output, unsigned long output_len,
> { }
> #endif
> 
>-static void parse_elf(void *output)
>+static size_t parse_elf(void *output)
> {
> #ifdef CONFIG_X86_64
> 	Elf64_Ehdr ehdr;
>@@ -287,6 +287,7 @@ static void parse_elf(void *output)
> 	Elf32_Phdr *phdrs, *phdr;
> #endif
> 	void *dest;
>+	size_t off;
> 	int i;
> 
> 	memcpy(&ehdr, output, sizeof(ehdr));
>@@ -295,16 +296,19 @@ static void parse_elf(void *output)
> 	   ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
> 	   ehdr.e_ident[EI_MAG3] != ELFMAG3) {
> 		error("Kernel is not a valid ELF file");
>-		return;
>+		return 0;
> 	}
> 
> 	debug_putstr("Parsing ELF... ");
> 
> 	phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
>-	if (!phdrs)
>+	if (!phdrs) {
> 		error("Failed to allocate space for phdrs");
>+		return 0;
>+	}
> 
> 	memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum);
>+	off = ehdr.e_entry - phdrs->p_paddr;
> 
> 	for (i = 0; i < ehdr.e_phnum; i++) {
> 		phdr = &phdrs[i];
>@@ -328,6 +332,7 @@ static void parse_elf(void *output)
> 	}
> 
> 	free(phdrs);
>+	return off;
> }
> 
> /*
>@@ -356,6 +361,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
> 	const unsigned long kernel_total_size = VO__end - VO__text;
> 	unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
> 	unsigned long needed_size;
>+	size_t off;
> 
> 	/* Retain x86 boot parameters pointer passed from startup_32/64. */
> 	boot_params = rmode;
>@@ -456,14 +462,14 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
> 	debug_putstr("\nDecompressing Linux... ");
> 	__decompress(input_data, input_len, NULL, NULL, output, output_len,
> 			NULL, error);
>-	parse_elf(output);
>+	off = parse_elf(output);
> 	handle_relocations(output, output_len, virt_addr);
> 	debug_putstr("done.\nBooting the kernel.\n");
> 
> 	/* Disable exception handling before booting the kernel */
> 	cleanup_exception_handling();
> 
>-	return output;
>+	return output + off;
> }
> 
> void fortify_panic(const char *name)

Put the entry points in dedicated sections and declare them explicitly in the linker script (the standard name for this section would be .init, but we presumably want .init.32 and .init.64 to keep 32- and 64-bit code in separate sections.)
  
Jiri Slaby Nov. 1, 2022, 6:47 a.m. UTC | #2
On 31. 10. 22, 16:10, Alexander Lobakin wrote:
> After commit ce697ccee1a8 ("kbuild: remove head-y syntax"), I
> started digging whether x86 is ready from removing this old cruft.
> Removing its objects from the list makes the kernel unbootable.
> This applies only to bzImage, vmlinux still works correctly.
> The reason is that with no strict object order determined by the
> linker arguments, not the linker script, startup_64 can be placed
> not right at the beginning of the kernel.
> Here's vmlinux.map's beginning before removing:
> 
> ffffffff81000000         vmlinux.o:(.head.text)
> ffffffff81000000                 startup_64
> ffffffff81000070                 secondary_startup_64
> ffffffff81000075                 secondary_startup_64_no_verify
> ffffffff81000160                 verify_cpu
> 
> and after:
> 
> ffffffff81000000         vmlinux.o:(.head.text)
> ffffffff81000000                 pvh_start_xen
> ffffffff81000080                 startup_64
> ffffffff810000f0                 secondary_startup_64
> ffffffff810000f5                 secondary_startup_64_no_verify
> 
> Not a problem itself, but the self-extractor code has the address of
> that function hardcoded the beginning, not looking onto the ELF
> header, which always contains the address of startup_{32,64}().
> 
> So, instead of doing an "act of blind faith", just take the address
> from the ELF header and extract a relative offset to the entry
> point. The decompressor function already returns a pointer to the
> beginning of the kernel to the Asm code, which then jumps to it,
> so add that offset to the return value.
> This doesn't change anything for now, but allows to resign from the
> "head object list" for x86 and makes sure valid Kbuild or any other
> improvements won't break anything here in general.

Oh yeah! I wouldn't think that implementing this would be _that_ easy.

The next natural step would be to eliminate the whole head section. But 
that would need a bit more work as not all jumps are rip-relative, 
apparently...

Few comments below, so no Reviewed-by yet.

Tested-by: Jiri Slaby <jirislaby@kernel.org>

> Signed-off-by: Alexander Lobakin <alexandr.lobakin@intel.com>
...
> --- a/arch/x86/boot/compressed/misc.c
> +++ b/arch/x86/boot/compressed/misc.c
> @@ -277,7 +277,7 @@ static inline void handle_relocations(void *output, unsigned long output_len,
>   { }
>   #endif
>   
> -static void parse_elf(void *output)
> +static size_t parse_elf(void *output)
>   {
>   #ifdef CONFIG_X86_64
>   	Elf64_Ehdr ehdr;
> @@ -287,6 +287,7 @@ static void parse_elf(void *output)
>   	Elf32_Phdr *phdrs, *phdr;
>   #endif
>   	void *dest;
> +	size_t off;
>   	int i;
>   
>   	memcpy(&ehdr, output, sizeof(ehdr));
> @@ -295,16 +296,19 @@ static void parse_elf(void *output)
>   	   ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
>   	   ehdr.e_ident[EI_MAG3] != ELFMAG3) {
>   		error("Kernel is not a valid ELF file");
> -		return;
> +		return 0;

error() is noreturn, so you can remove these returns. They don't make 
sense anyway. Likely in a separate patch.

>   	}
>   
>   	debug_putstr("Parsing ELF... ");
>   
>   	phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
> -	if (!phdrs)
> +	if (!phdrs) {
>   		error("Failed to allocate space for phdrs");
> +		return 0;
> +	}
>   
>   	memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum);
> +	off = ehdr.e_entry - phdrs->p_paddr;
>   
>   	for (i = 0; i < ehdr.e_phnum; i++) {
>   		phdr = &phdrs[i];
> @@ -328,6 +332,7 @@ static void parse_elf(void *output)
>   	}
>   
>   	free(phdrs);
> +	return off;

You should add a \n before the return.

>   }
>   
>   /*
> @@ -356,6 +361,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
>   	const unsigned long kernel_total_size = VO__end - VO__text;
>   	unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
>   	unsigned long needed_size;
> +	size_t off;
>   
>   	/* Retain x86 boot parameters pointer passed from startup_32/64. */
>   	boot_params = rmode;
> @@ -456,14 +462,14 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
>   	debug_putstr("\nDecompressing Linux... ");
>   	__decompress(input_data, input_len, NULL, NULL, output, output_len,
>   			NULL, error);
> -	parse_elf(output);
> +	off = parse_elf(output);

Perhaps add:
   debug_putaddr(off);
here?

>   	handle_relocations(output, output_len, virt_addr);
>   	debug_putstr("done.\nBooting the kernel.\n");

>   
>   	/* Disable exception handling before booting the kernel */
>   	cleanup_exception_handling();
>   
> -	return output;
> +	return output + off;
>   }
>   
>   void fortify_panic(const char *name)

thanks,
  
Jiri Slaby Nov. 1, 2022, 6:50 a.m. UTC | #3
On 01. 11. 22, 7:47, Jiri Slaby wrote:
> Tested-by: Jiri Slaby <jirislaby@kernel.org>

FWIW that means: tested with gcc-lto which deliberately stores 
startup_64() randomly, not to the beginning of vmlinux.
  
Alexander Lobakin Nov. 1, 2022, 3:38 p.m. UTC | #4
From: Jiri Slaby <jirislaby@kernel.org>
Date: Tue, 1 Nov 2022 07:50:04 +0100

> On 01. 11. 22, 7:47, Jiri Slaby wrote:
> > Tested-by: Jiri Slaby <jirislaby@kernel.org>
> 
> FWIW that means: tested with gcc-lto which deliberately stores 
> startup_64() randomly, not to the beginning of vmlinux.

Yeah, you never know. After rebasing on top of 6.1-rc1 my FG-KASLR
series stopped booting, I found out that the combo of that
head-object-list.txt + -ffunction-sections (but for Asm) also made
startup_64() appear in random parts of .head.text.

Re head removal -- that would be awesome, but currently from what
I've tried during the development, there's a lot of stuff to be
redone, the compressor code sometimes just doesn't want to work
if you play with .head.text too much. Will see.

> 
> -- 
> js
> suse labs

Thanks,
Olek
  
Alexander Lobakin Nov. 1, 2022, 3:44 p.m. UTC | #5
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 31 Oct 2022 13:31:05 -0700

> On October 31, 2022 8:10:46 AM PDT, Alexander Lobakin <alexandr.lobakin@intel.com> wrote:
> >After commit ce697ccee1a8 ("kbuild: remove head-y syntax"), I
> >started digging whether x86 is ready from removing this old cruft.
> >Removing its objects from the list makes the kernel unbootable.
> >This applies only to bzImage, vmlinux still works correctly.
> >The reason is that with no strict object order determined by the
> >linker arguments, not the linker script, startup_64 can be placed
> >not right at the beginning of the kernel.
> >Here's vmlinux.map's beginning before removing:

[...]

> >-	return output;
> >+	return output + off;
> > }
> > 
> > void fortify_panic(const char *name)
> 
> Put the entry points in dedicated sections and declare them explicitly in the linker script (the standard name for this section would be .init, but we presumably want .init.32 and .init.64 to keep 32- and 64-bit code in separate sections.)

Uhm, I'm not sure I follow. We're trying to eliminate as much
hardcode as possible, and now you propose to introduce new :P
Some architectures don't have a separate .head.text at all: startup
functions are placed in .init.text, preboot code finds the entry
point and it just works. It's not possible currently on x86_64, but
we'll be there one day.

Thanks,
Olek
  

Patch

diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 3b354eb9516d..56f9847e208b 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -187,7 +187,7 @@  SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
 	leal	boot_heap@GOTOFF(%ebx), %eax
 	pushl	%eax			/* heap area */
 	pushl	%esi			/* real mode pointer */
-	call	extract_kernel		/* returns kernel location in %eax */
+	call	extract_kernel		/* returns kernel entry point in %eax */
 	addl	$24, %esp
 
 /*
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index d33f060900d2..aeba5aa3d26c 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -593,7 +593,7 @@  SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
 	movl	input_len(%rip), %ecx	/* input_len */
 	movq	%rbp, %r8		/* output target address */
 	movl	output_len(%rip), %r9d	/* decompressed length, end of relocs */
-	call	extract_kernel		/* returns kernel location in %rax */
+	call	extract_kernel		/* returns kernel entry point in %rax */
 	popq	%rsi
 
 /*
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index cf690d8712f4..96ae5af29f49 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -277,7 +277,7 @@  static inline void handle_relocations(void *output, unsigned long output_len,
 { }
 #endif
 
-static void parse_elf(void *output)
+static size_t parse_elf(void *output)
 {
 #ifdef CONFIG_X86_64
 	Elf64_Ehdr ehdr;
@@ -287,6 +287,7 @@  static void parse_elf(void *output)
 	Elf32_Phdr *phdrs, *phdr;
 #endif
 	void *dest;
+	size_t off;
 	int i;
 
 	memcpy(&ehdr, output, sizeof(ehdr));
@@ -295,16 +296,19 @@  static void parse_elf(void *output)
 	   ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
 	   ehdr.e_ident[EI_MAG3] != ELFMAG3) {
 		error("Kernel is not a valid ELF file");
-		return;
+		return 0;
 	}
 
 	debug_putstr("Parsing ELF... ");
 
 	phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
-	if (!phdrs)
+	if (!phdrs) {
 		error("Failed to allocate space for phdrs");
+		return 0;
+	}
 
 	memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum);
+	off = ehdr.e_entry - phdrs->p_paddr;
 
 	for (i = 0; i < ehdr.e_phnum; i++) {
 		phdr = &phdrs[i];
@@ -328,6 +332,7 @@  static void parse_elf(void *output)
 	}
 
 	free(phdrs);
+	return off;
 }
 
 /*
@@ -356,6 +361,7 @@  asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	const unsigned long kernel_total_size = VO__end - VO__text;
 	unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
 	unsigned long needed_size;
+	size_t off;
 
 	/* Retain x86 boot parameters pointer passed from startup_32/64. */
 	boot_params = rmode;
@@ -456,14 +462,14 @@  asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	debug_putstr("\nDecompressing Linux... ");
 	__decompress(input_data, input_len, NULL, NULL, output, output_len,
 			NULL, error);
-	parse_elf(output);
+	off = parse_elf(output);
 	handle_relocations(output, output_len, virt_addr);
 	debug_putstr("done.\nBooting the kernel.\n");
 
 	/* Disable exception handling before booting the kernel */
 	cleanup_exception_handling();
 
-	return output;
+	return output + off;
 }
 
 void fortify_panic(const char *name)