[v5,10/16] x86/startup_64: Simplify virtual switch on primary boot

Message ID 20240221113506.2565718-28-ardb+git@google.com
State New
Headers
Series x86: Confine early 1:1 mapped startup code |

Commit Message

Ard Biesheuvel Feb. 21, 2024, 11:35 a.m. UTC
  From: Ard Biesheuvel <ardb@kernel.org>

The secondary startup code is used on the primary boot path as well, but
in this case, the initial part runs from a 1:1 mapping, until an
explicit cross-jump is made to the kernel virtual mapping of the same
code.

On the secondary boot path, this jump is pointless as the code already
executes from the mapping targeted by the jump. So combine this
cross-jump with the jump from startup_64() into the common boot path.
This simplifies the execution flow, and clearly separates code that runs
from a 1:1 mapping from code that runs from the kernel virtual mapping.

Note that this requires a page table switch, so hoist the CR3 assignment
into startup_64() as well. And since absolute symbol references will no
longer be permitted in .head.text once we enable the associated build
time checks, a RIP-relative memory operand is used in the JMP
instruction, referring to an absolute constant in the .init.rodata
section.

Given that the secondary startup code does not require a special
placement inside the executable, move it to the .noinstr.text section.
This requires the use of a subsection so that the payload is placed
after the page aligned Xen hypercall page, as otherwise, objtool will
complain about the resulting JMP instruction emitted by the assembler
being unreachable.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/kernel/head64.c  |  2 +-
 arch/x86/kernel/head_64.S | 43 ++++++++++----------
 2 files changed, 23 insertions(+), 22 deletions(-)
  

Comments

Ard Biesheuvel Feb. 23, 2024, 1:11 p.m. UTC | #1
On Wed, 21 Feb 2024 at 12:36, Ard Biesheuvel <ardb+git@google.com> wrote:
>
> From: Ard Biesheuvel <ardb@kernel.org>
>
> The secondary startup code is used on the primary boot path as well, but
> in this case, the initial part runs from a 1:1 mapping, until an
> explicit cross-jump is made to the kernel virtual mapping of the same
> code.
>
> On the secondary boot path, this jump is pointless as the code already
> executes from the mapping targeted by the jump. So combine this
> cross-jump with the jump from startup_64() into the common boot path.
> This simplifies the execution flow, and clearly separates code that runs
> from a 1:1 mapping from code that runs from the kernel virtual mapping.
>
> Note that this requires a page table switch, so hoist the CR3 assignment
> into startup_64() as well. And since absolute symbol references will no
> longer be permitted in .head.text once we enable the associated build
> time checks, a RIP-relative memory operand is used in the JMP
> instruction, referring to an absolute constant in the .init.rodata
> section.
>
> Given that the secondary startup code does not require a special
> placement inside the executable, move it to the .noinstr.text section.

This should be the .text section, or we get

vmlinux.o: warning: objtool: early_setup_idt+0x4: call to
startup_64_load_idt() leaves .noinstr.text section

It would be better to have the secondary startup code in
noinstr.text, but it is only a very minor improvement. I'll be
looking into teaching objtool to be strict about .head.text code and
reject references that use absolute addressing, so I might be able to
tweak it to permit this use case as well but at this point we should
probably just move it into ordinary .text


> This requires the use of a subsection so that the payload is placed
> after the page aligned Xen hypercall page, as otherwise, objtool will
> complain about the resulting JMP instruction emitted by the assembler
> being unreachable.
>

^^^ this bit can be dropped, and the following hunk applied (apologies
if my gmail mangles this - i can resend the patch or the series)


diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 4bee33d8e1dc..e16df01791be 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -515,7 +515,7 @@
 }

 /* This is used when running on kernel addresses */
-void noinstr early_setup_idt(void)
+void early_setup_idt(void)
 {
        void *handler = NULL;

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 32fefb23f4df..c9ee92550508 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -139,8 +139,7 @@
        __INITRODATA
 0:     .quad   common_startup_64

-       .section .noinstr.text, "ax"
-       .subsection 1
+       .text
 SYM_CODE_START(secondary_startup_64)
        UNWIND_HINT_END_OF_STACK
        ANNOTATE_NOENDBR
  

Patch

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index deaaea3280d9..0b827cbf6ee4 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -553,7 +553,7 @@  static void __head startup_64_load_idt(void *vc_handler)
 }
 
 /* This is used when running on kernel addresses */
-void early_setup_idt(void)
+void noinstr early_setup_idt(void)
 {
 	void *handler = NULL;
 
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b92031d7e006..03268bf0214a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -39,7 +39,6 @@  L4_START_KERNEL = l4_index(__START_KERNEL_map)
 
 L3_START_KERNEL = pud_index(__START_KERNEL_map)
 
-	.text
 	__HEAD
 	.code64
 SYM_CODE_START_NOALIGN(startup_64)
@@ -126,9 +125,22 @@  SYM_CODE_START_NOALIGN(startup_64)
 	call	sev_verify_cbit
 #endif
 
-	jmp 1f
+	/*
+	 * Switch to early_top_pgt which still has the identity mappings
+	 * present.
+	 */
+	movq	%rax, %cr3
+
+	/* Branch to the common startup code at its kernel virtual address */
+	ANNOTATE_RETPOLINE_SAFE
+	jmp	*0f(%rip)
 SYM_CODE_END(startup_64)
 
+	__INITRODATA
+0:	.quad	common_startup_64
+
+	.section .noinstr.text, "ax"
+	.subsection 1
 SYM_CODE_START(secondary_startup_64)
 	UNWIND_HINT_END_OF_STACK
 	ANNOTATE_NOENDBR
@@ -174,8 +186,15 @@  SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 	addq	sme_me_mask(%rip), %rax
 #endif
+	/*
+	 * Switch to the init_top_pgt here, away from the trampoline_pgd and
+	 * unmap the identity mapped ranges.
+	 */
+	movq	%rax, %cr3
 
-1:
+SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL)
+	UNWIND_HINT_END_OF_STACK
+	ANNOTATE_NOENDBR
 
 	/* Create a mask of CR4 bits to preserve */
 	movl	$(X86_CR4_PAE | X86_CR4_LA57), %edx
@@ -196,16 +215,6 @@  SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	btsl	$X86_CR4_PSE_BIT, %ecx
 	movq	%rcx, %cr4
 
-	/*
-	 * Switch to new page-table
-	 *
-	 * For the boot CPU this switches to early_top_pgt which still has the
-	 * identity mappings present. The secondary CPUs will switch to the
-	 * init_top_pgt here, away from the trampoline_pgd and unmap the
-	 * identity mapped ranges.
-	 */
-	movq	%rax, %cr3
-
 	/*
 	 * Do a global TLB flush after the CR3 switch to make sure the TLB
 	 * entries from the identity mapping are flushed.
@@ -213,14 +222,6 @@  SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	btsl	$X86_CR4_PGE_BIT, %ecx
 	movq	%rcx, %cr4
 
-	/* Ensure I am executing from virtual addresses */
-	movq	$1f, %rax
-	ANNOTATE_RETPOLINE_SAFE
-	jmp	*%rax
-1:
-	UNWIND_HINT_END_OF_STACK
-	ANNOTATE_NOENDBR // above
-
 #ifdef CONFIG_SMP
 	/*
 	 * For parallel boot, the APIC ID is read from the APIC, and then