[v5,08/20] x86/decompressor: Use standard calling convention for trampoline

Message ID 20230607072342.4054036-9-ardb@kernel.org
State New
Headers
Series efi/x86: Avoid bare metal decompressor during EFI boot |

Commit Message

Ard Biesheuvel June 7, 2023, 7:23 a.m. UTC
  Update the trampoline code so its arguments are passed via RDI and RSI,
which matches the ordinary SysV calling convention for x86_64. This will
allow this code to be called directly from C.

Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/boot/compressed/head_64.S | 30 +++++++++-----------
 arch/x86/boot/compressed/pgtable.h |  2 +-
 2 files changed, 14 insertions(+), 18 deletions(-)
  

Comments

Yunhong Jiang June 7, 2023, 7:38 p.m. UTC | #1
On Wed, Jun 07, 2023 at 09:23:30AM +0200, Ard Biesheuvel wrote:
> Update the trampoline code so its arguments are passed via RDI and RSI,
> which matches the ordinary SysV calling convention for x86_64. This will
> allow this code to be called directly from C.
> 
> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
>  arch/x86/boot/compressed/head_64.S | 30 +++++++++-----------
>  arch/x86/boot/compressed/pgtable.h |  2 +-
>  2 files changed, 14 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
> index af45ddd8297a4a07..a387cd80964e1a1e 100644
> --- a/arch/x86/boot/compressed/head_64.S
> +++ b/arch/x86/boot/compressed/head_64.S
> @@ -443,9 +443,9 @@ SYM_CODE_START(startup_64)
>  	movq	%r15, %rdi		/* pass struct boot_params pointer */
>  	call	paging_prepare
>  
> -	/* Save the trampoline address in RCX */
> -	movq	%rax, %rcx
> -
> +	/* Pass the trampoline address and boolean flag as args #1 and #2 */
> +	movq	%rax, %rdi
> +	movq	%rdx, %rsi
>  	leaq	TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
>  	call	*%rax
>  
> @@ -534,11 +534,11 @@ SYM_FUNC_END(.Lrelocated)
>  /*
>   * This is the 32-bit trampoline that will be copied over to low memory.
>   *
> - * ECX contains the base address of the trampoline memory.
> - * Non zero RDX means trampoline needs to enable 5-level paging.
> + * EDI contains the base address of the trampoline memory.
> + * Non-zero ESI means trampoline needs to enable 5-level paging.
>   */
>  SYM_CODE_START(trampoline_32bit_src)

After the whole patchset, this function now only switch the paging level, is my
understanding correct? After all, it's converted to toggle_la57 directly in the
followed patches. If that's the case, would it makes sense to rename it
correspondingly?

Also, to align with the toggle_la57, would we make the first parameter as just
page table, instead of trampoline memory address?

> -	popq	%rdi
> +	popq	%r8
>  	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
>  	pushq	$__KERNEL32_CS
>  	leaq	0f(%rip), %rax
> @@ -552,7 +552,7 @@ SYM_CODE_START(trampoline_32bit_src)
>  	movl	%eax, %ss
>  
>  	/* Set up new stack */
> -	leal	TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
> +	leal	TRAMPOLINE_32BIT_STACK_END(%edi), %esp
>  
>  	/* Disable paging */
>  	movl	%cr0, %eax
> @@ -560,7 +560,7 @@ SYM_CODE_START(trampoline_32bit_src)
>  	movl	%eax, %cr0
>  
>  	/* Check what paging mode we want to be in after the trampoline */
> -	testl	%edx, %edx
> +	testl	%esi, %esi
>  	jz	1f
>  
>  	/* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
> @@ -575,21 +575,17 @@ SYM_CODE_START(trampoline_32bit_src)
>  	jz	3f
>  2:
>  	/* Point CR3 to the trampoline's new top level page table */
> -	leal	TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
> +	leal	TRAMPOLINE_32BIT_PGTABLE_OFFSET(%edi), %eax
>  	movl	%eax, %cr3
>  3:
>  	/* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
> -	pushl	%ecx
> -	pushl	%edx
>  	movl	$MSR_EFER, %ecx
>  	rdmsr
>  	btsl	$_EFER_LME, %eax
>  	/* Avoid writing EFER if no change was made (for TDX guest) */
>  	jc	1f
>  	wrmsr
> -1:	popl	%edx
> -	popl	%ecx
> -
> +1:
>  #ifdef CONFIG_X86_MCE
>  	/*
>  	 * Preserve CR4.MCE if the kernel will enable #MC support.
> @@ -606,14 +602,14 @@ SYM_CODE_START(trampoline_32bit_src)
>  
>  	/* Enable PAE and LA57 (if required) paging modes */
>  	orl	$X86_CR4_PAE, %eax
> -	testl	%edx, %edx
> +	testl	%esi, %esi
>  	jz	1f
>  	orl	$X86_CR4_LA57, %eax
>  1:
>  	movl	%eax, %cr4
>  
>  	/* Calculate address of paging_enabled() once we are executing in the trampoline */
> -	leal	.Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
> +	leal	.Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%edi), %eax
>  
>  	/* Prepare the stack for far return to Long Mode */
>  	pushl	$__KERNEL_CS
> @@ -630,7 +626,7 @@ SYM_CODE_END(trampoline_32bit_src)
>  	.code64
>  SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
>  	/* Return from the trampoline */
> -	jmp	*%rdi
> +	jmp	*%r8
>  SYM_FUNC_END(.Lpaging_enabled)
>  
>  	/*
> diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
> index 91dbb99203fbce2d..4e8cef135226bcbb 100644
> --- a/arch/x86/boot/compressed/pgtable.h
> +++ b/arch/x86/boot/compressed/pgtable.h
> @@ -14,7 +14,7 @@
>  
>  extern unsigned long *trampoline_32bit;
>  
> -extern void trampoline_32bit_src(void *return_ptr);
> +extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
>  
>  #endif /* __ASSEMBLER__ */
>  #endif /* BOOT_COMPRESSED_PAGETABLE_H */
> -- 
> 2.39.2
>
  
Ard Biesheuvel June 7, 2023, 8:07 p.m. UTC | #2
On Wed, 7 Jun 2023 at 21:38, Yunhong Jiang
<yunhong.jiang@linux.intel.com> wrote:
>
> On Wed, Jun 07, 2023 at 09:23:30AM +0200, Ard Biesheuvel wrote:
> > Update the trampoline code so its arguments are passed via RDI and RSI,
> > which matches the ordinary SysV calling convention for x86_64. This will
> > allow this code to be called directly from C.
> >
> > Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > ---
> >  arch/x86/boot/compressed/head_64.S | 30 +++++++++-----------
> >  arch/x86/boot/compressed/pgtable.h |  2 +-
> >  2 files changed, 14 insertions(+), 18 deletions(-)
> >
> > diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
> > index af45ddd8297a4a07..a387cd80964e1a1e 100644
> > --- a/arch/x86/boot/compressed/head_64.S
> > +++ b/arch/x86/boot/compressed/head_64.S
> > @@ -443,9 +443,9 @@ SYM_CODE_START(startup_64)
> >       movq    %r15, %rdi              /* pass struct boot_params pointer */
> >       call    paging_prepare
> >
> > -     /* Save the trampoline address in RCX */
> > -     movq    %rax, %rcx
> > -
> > +     /* Pass the trampoline address and boolean flag as args #1 and #2 */
> > +     movq    %rax, %rdi
> > +     movq    %rdx, %rsi
> >       leaq    TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
> >       call    *%rax
> >
> > @@ -534,11 +534,11 @@ SYM_FUNC_END(.Lrelocated)
> >  /*
> >   * This is the 32-bit trampoline that will be copied over to low memory.
> >   *
> > - * ECX contains the base address of the trampoline memory.
> > - * Non zero RDX means trampoline needs to enable 5-level paging.
> > + * EDI contains the base address of the trampoline memory.
> > + * Non-zero ESI means trampoline needs to enable 5-level paging.
> >   */
> >  SYM_CODE_START(trampoline_32bit_src)
>
> After the whole patchset, this function now only switch the paging level, is my
> understanding correct? After all, it's converted to toggle_la57 directly in the
> followed patches. If that's the case, would it makes sense to rename it
> correspondingly?
>

This is template code that is copied to a 32-bit addressable buffer
and called there.

> Also, to align with the toggle_la57, would we make the first parameter as just
> page table, instead of trampoline memory address?
>

Sure, but instead of rewriting this code from scratch, I decided to
make incremental changes to it, for easier review and bisect.

Of course, we could change the name, change the prototype, and another
thing we might do is drop the second argument, which is redundant now
that we always toggle and never preserve the existing value of LA57.

However, this was not necessary for making the code reusable from the
EFI stub, so I left it for further cleanup.

> > -     popq    %rdi
> > +     popq    %r8
> >       /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
> >       pushq   $__KERNEL32_CS
> >       leaq    0f(%rip), %rax
> > @@ -552,7 +552,7 @@ SYM_CODE_START(trampoline_32bit_src)
> >       movl    %eax, %ss
> >
> >       /* Set up new stack */
> > -     leal    TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
> > +     leal    TRAMPOLINE_32BIT_STACK_END(%edi), %esp
> >
> >       /* Disable paging */
> >       movl    %cr0, %eax
> > @@ -560,7 +560,7 @@ SYM_CODE_START(trampoline_32bit_src)
> >       movl    %eax, %cr0
> >
> >       /* Check what paging mode we want to be in after the trampoline */
> > -     testl   %edx, %edx
> > +     testl   %esi, %esi
> >       jz      1f
> >
> >       /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
> > @@ -575,21 +575,17 @@ SYM_CODE_START(trampoline_32bit_src)
> >       jz      3f
> >  2:
> >       /* Point CR3 to the trampoline's new top level page table */
> > -     leal    TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
> > +     leal    TRAMPOLINE_32BIT_PGTABLE_OFFSET(%edi), %eax
> >       movl    %eax, %cr3
> >  3:
> >       /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
> > -     pushl   %ecx
> > -     pushl   %edx
> >       movl    $MSR_EFER, %ecx
> >       rdmsr
> >       btsl    $_EFER_LME, %eax
> >       /* Avoid writing EFER if no change was made (for TDX guest) */
> >       jc      1f
> >       wrmsr
> > -1:   popl    %edx
> > -     popl    %ecx
> > -
> > +1:
> >  #ifdef CONFIG_X86_MCE
> >       /*
> >        * Preserve CR4.MCE if the kernel will enable #MC support.
> > @@ -606,14 +602,14 @@ SYM_CODE_START(trampoline_32bit_src)
> >
> >       /* Enable PAE and LA57 (if required) paging modes */
> >       orl     $X86_CR4_PAE, %eax
> > -     testl   %edx, %edx
> > +     testl   %esi, %esi
> >       jz      1f
> >       orl     $X86_CR4_LA57, %eax
> >  1:
> >       movl    %eax, %cr4
> >
> >       /* Calculate address of paging_enabled() once we are executing in the trampoline */
> > -     leal    .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
> > +     leal    .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%edi), %eax
> >
> >       /* Prepare the stack for far return to Long Mode */
> >       pushl   $__KERNEL_CS
> > @@ -630,7 +626,7 @@ SYM_CODE_END(trampoline_32bit_src)
> >       .code64
> >  SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
> >       /* Return from the trampoline */
> > -     jmp     *%rdi
> > +     jmp     *%r8
> >  SYM_FUNC_END(.Lpaging_enabled)
> >
> >       /*
> > diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
> > index 91dbb99203fbce2d..4e8cef135226bcbb 100644
> > --- a/arch/x86/boot/compressed/pgtable.h
> > +++ b/arch/x86/boot/compressed/pgtable.h
> > @@ -14,7 +14,7 @@
> >
> >  extern unsigned long *trampoline_32bit;
> >
> > -extern void trampoline_32bit_src(void *return_ptr);
> > +extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
> >
> >  #endif /* __ASSEMBLER__ */
> >  #endif /* BOOT_COMPRESSED_PAGETABLE_H */
> > --
> > 2.39.2
> >
  

Patch

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index af45ddd8297a4a07..a387cd80964e1a1e 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -443,9 +443,9 @@  SYM_CODE_START(startup_64)
 	movq	%r15, %rdi		/* pass struct boot_params pointer */
 	call	paging_prepare
 
-	/* Save the trampoline address in RCX */
-	movq	%rax, %rcx
-
+	/* Pass the trampoline address and boolean flag as args #1 and #2 */
+	movq	%rax, %rdi
+	movq	%rdx, %rsi
 	leaq	TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
 	call	*%rax
 
@@ -534,11 +534,11 @@  SYM_FUNC_END(.Lrelocated)
 /*
  * This is the 32-bit trampoline that will be copied over to low memory.
  *
- * ECX contains the base address of the trampoline memory.
- * Non zero RDX means trampoline needs to enable 5-level paging.
+ * EDI contains the base address of the trampoline memory.
+ * Non-zero ESI means trampoline needs to enable 5-level paging.
  */
 SYM_CODE_START(trampoline_32bit_src)
-	popq	%rdi
+	popq	%r8
 	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
 	pushq	$__KERNEL32_CS
 	leaq	0f(%rip), %rax
@@ -552,7 +552,7 @@  SYM_CODE_START(trampoline_32bit_src)
 	movl	%eax, %ss
 
 	/* Set up new stack */
-	leal	TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
+	leal	TRAMPOLINE_32BIT_STACK_END(%edi), %esp
 
 	/* Disable paging */
 	movl	%cr0, %eax
@@ -560,7 +560,7 @@  SYM_CODE_START(trampoline_32bit_src)
 	movl	%eax, %cr0
 
 	/* Check what paging mode we want to be in after the trampoline */
-	testl	%edx, %edx
+	testl	%esi, %esi
 	jz	1f
 
 	/* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
@@ -575,21 +575,17 @@  SYM_CODE_START(trampoline_32bit_src)
 	jz	3f
 2:
 	/* Point CR3 to the trampoline's new top level page table */
-	leal	TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
+	leal	TRAMPOLINE_32BIT_PGTABLE_OFFSET(%edi), %eax
 	movl	%eax, %cr3
 3:
 	/* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
-	pushl	%ecx
-	pushl	%edx
 	movl	$MSR_EFER, %ecx
 	rdmsr
 	btsl	$_EFER_LME, %eax
 	/* Avoid writing EFER if no change was made (for TDX guest) */
 	jc	1f
 	wrmsr
-1:	popl	%edx
-	popl	%ecx
-
+1:
 #ifdef CONFIG_X86_MCE
 	/*
 	 * Preserve CR4.MCE if the kernel will enable #MC support.
@@ -606,14 +602,14 @@  SYM_CODE_START(trampoline_32bit_src)
 
 	/* Enable PAE and LA57 (if required) paging modes */
 	orl	$X86_CR4_PAE, %eax
-	testl	%edx, %edx
+	testl	%esi, %esi
 	jz	1f
 	orl	$X86_CR4_LA57, %eax
 1:
 	movl	%eax, %cr4
 
 	/* Calculate address of paging_enabled() once we are executing in the trampoline */
-	leal	.Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
+	leal	.Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%edi), %eax
 
 	/* Prepare the stack for far return to Long Mode */
 	pushl	$__KERNEL_CS
@@ -630,7 +626,7 @@  SYM_CODE_END(trampoline_32bit_src)
 	.code64
 SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
 	/* Return from the trampoline */
-	jmp	*%rdi
+	jmp	*%r8
 SYM_FUNC_END(.Lpaging_enabled)
 
 	/*
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
index 91dbb99203fbce2d..4e8cef135226bcbb 100644
--- a/arch/x86/boot/compressed/pgtable.h
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -14,7 +14,7 @@ 
 
 extern unsigned long *trampoline_32bit;
 
-extern void trampoline_32bit_src(void *return_ptr);
+extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
 
 #endif /* __ASSEMBLER__ */
 #endif /* BOOT_COMPRESSED_PAGETABLE_H */