[v4,11/11] x86/startup_64: Drop global variables keeping track of LA57 state
Commit Message
From: Ard Biesheuvel <ardb@kernel.org>
On x86_64, the core kernel is entered in long mode, which implies that
paging is enabled. This means that the CR4.LA57 control bit is
guaranteed to be in sync with the number of paging levels used by the
kernel, and there is no need to store this in a variable.
There is also no need to use variables for storing the calculations of
pgdir_shift and ptrs_per_p4d, as they are easily determined on the fly.
This removes the need for two different sources of truth for determining
whether 5-level paging is in use: CR4.LA57 always reflects the actual
state, and never changes from the point of view of the 64-bit core
kernel. The only potential concern is the cost of CR4 accesses, which
can be mitigated using alternatives patching based on feature detection.
Note that even the decompressor does not manipulate any page tables
before updating CR4.LA57, so it can also avoid the associated global
variables entirely. However, as it does not implement alternatives
patching, the associated ELF sections need to be discarded.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/x86/boot/compressed/misc.h | 4 --
arch/x86/boot/compressed/pgtable_64.c | 12 ----
arch/x86/boot/compressed/vmlinux.lds.S | 1 +
arch/x86/include/asm/pgtable_64_types.h | 58 ++++++++------------
arch/x86/kernel/cpu/common.c | 2 -
arch/x86/kernel/head64.c | 33 +----------
arch/x86/mm/kasan_init_64.c | 3 -
arch/x86/mm/mem_encrypt_identity.c | 9 ---
8 files changed, 27 insertions(+), 95 deletions(-)
Comments
On Tue, Feb 13, 2024 at 7:42 AM Ard Biesheuvel <ardb+git@google.com> wrote:
>
> From: Ard Biesheuvel <ardb@kernel.org>
>
> On x86_64, the core kernel is entered in long mode, which implies that
> paging is enabled. This means that the CR4.LA57 control bit is
> guaranteed to be in sync with the number of paging levels used by the
> kernel, and there is no need to store this in a variable.
>
> There is also no need to use variables for storing the calculations of
> pgdir_shift and ptrs_per_p4d, as they are easily determined on the fly.
>
> This removes the need for two different sources of truth for determining
> whether 5-level paging is in use: CR4.LA57 always reflects the actual
> state, and never changes from the point of view of the 64-bit core
> kernel. The only potential concern is the cost of CR4 accesses, which
> can be mitigated using alternatives patching based on feature detection.
>
> Note that even the decompressor does not manipulate any page tables
> before updating CR4.LA57, so it can also avoid the associated global
> variables entirely. However, as it does not implement alternatives
> patching, the associated ELF sections need to be discarded.
>
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
> arch/x86/boot/compressed/misc.h | 4 --
> arch/x86/boot/compressed/pgtable_64.c | 12 ----
> arch/x86/boot/compressed/vmlinux.lds.S | 1 +
> arch/x86/include/asm/pgtable_64_types.h | 58 ++++++++------------
> arch/x86/kernel/cpu/common.c | 2 -
> arch/x86/kernel/head64.c | 33 +----------
> arch/x86/mm/kasan_init_64.c | 3 -
> arch/x86/mm/mem_encrypt_identity.c | 9 ---
> 8 files changed, 27 insertions(+), 95 deletions(-)
>
> diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
> index bc2f0f17fb90..2b15ddd0e177 100644
> --- a/arch/x86/boot/compressed/misc.h
> +++ b/arch/x86/boot/compressed/misc.h
> @@ -16,9 +16,6 @@
>
> #define __NO_FORTIFY
>
> -/* cpu_feature_enabled() cannot be used this early */
> -#define USE_EARLY_PGTABLE_L5
> -
> /*
> * Boot stub deals with identity mappings, physical and virtual addresses are
> * the same, so override these defines.
> @@ -178,7 +175,6 @@ static inline int count_immovable_mem_regions(void) { return 0; }
> #endif
>
> /* ident_map_64.c */
> -extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
> extern void kernel_add_identity_map(unsigned long start, unsigned long end);
>
> /* Used by PAGE_KERN* macros: */
> diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
> index 51f957b24ba7..ae72f53f5e77 100644
> --- a/arch/x86/boot/compressed/pgtable_64.c
> +++ b/arch/x86/boot/compressed/pgtable_64.c
> @@ -9,13 +9,6 @@
> #define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
> #define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
>
> -#ifdef CONFIG_X86_5LEVEL
> -/* __pgtable_l5_enabled needs to be in .data to avoid being cleared along with .bss */
> -unsigned int __section(".data") __pgtable_l5_enabled;
> -unsigned int __section(".data") pgdir_shift = 39;
> -unsigned int __section(".data") ptrs_per_p4d = 1;
> -#endif
> -
> /* Buffer to preserve trampoline memory */
> static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
>
> @@ -125,11 +118,6 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
> native_cpuid_eax(0) >= 7 &&
> (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
> l5_required = true;
> -
> - /* Initialize variables for 5-level paging */
> - __pgtable_l5_enabled = 1;
> - pgdir_shift = 48;
> - ptrs_per_p4d = 512;
> }
>
> /*
> diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
> index 083ec6d7722a..06358bb067fe 100644
> --- a/arch/x86/boot/compressed/vmlinux.lds.S
> +++ b/arch/x86/boot/compressed/vmlinux.lds.S
> @@ -81,6 +81,7 @@ SECTIONS
> *(.dynamic) *(.dynsym) *(.dynstr) *(.dynbss)
> *(.hash) *(.gnu.hash)
> *(.note.*)
> + *(.altinstructions .altinstr_replacement)
> }
>
> .got.plt (INFO) : {
> diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
> index 38b54b992f32..6a57bfdff52b 100644
> --- a/arch/x86/include/asm/pgtable_64_types.h
> +++ b/arch/x86/include/asm/pgtable_64_types.h
> @@ -6,7 +6,10 @@
>
> #ifndef __ASSEMBLY__
> #include <linux/types.h>
> +#include <asm/alternative.h>
> +#include <asm/cpufeatures.h>
> #include <asm/kaslr.h>
> +#include <asm/processor-flags.h>
>
> /*
> * These are used to make use of C type-checking..
> @@ -21,63 +24,50 @@ typedef unsigned long pgprotval_t;
> typedef struct { pteval_t pte; } pte_t;
> typedef struct { pmdval_t pmd; } pmd_t;
>
> -#ifdef CONFIG_X86_5LEVEL
> -extern unsigned int __pgtable_l5_enabled;
> -
> -#ifdef USE_EARLY_PGTABLE_L5
> -/*
> - * cpu_feature_enabled() is not available in early boot code.
> - * Use variable instead.
> - */
> -static inline bool pgtable_l5_enabled(void)
> +static __always_inline __pure bool pgtable_l5_enabled(void)
> {
> - return __pgtable_l5_enabled;
> -}
> -#else
> -#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
> -#endif /* USE_EARLY_PGTABLE_L5 */
> + unsigned long r;
> + bool ret;
>
> -#else
> -#define pgtable_l5_enabled() 0
> -#endif /* CONFIG_X86_5LEVEL */
> + if (!IS_ENABLED(CONFIG_X86_5LEVEL))
> + return false;
> +
> + asm(ALTERNATIVE_TERNARY(
> + "movq %%cr4, %[reg] \n\t btl %[la57], %k[reg]" CC_SET(c),
> + %P[feat], "stc", "clc")
> + : [reg] "=&r" (r), CC_OUT(c) (ret)
> + : [feat] "i" (X86_FEATURE_LA57),
> + [la57] "i" (X86_CR4_LA57_BIT)
> + : "cc");
This should be more like _static_cpu_has(), where the runtime test is
out of line in a discardable section, and the inline part is just a
JMP or NOP.
Brian Gerst
On Wed, 14 Feb 2024 at 16:24, Brian Gerst <brgerst@gmail.com> wrote:
>
> On Tue, Feb 13, 2024 at 7:42 AM Ard Biesheuvel <ardb+git@google.com> wrote:
> >
> > From: Ard Biesheuvel <ardb@kernel.org>
> >
> > On x86_64, the core kernel is entered in long mode, which implies that
> > paging is enabled. This means that the CR4.LA57 control bit is
> > guaranteed to be in sync with the number of paging levels used by the
> > kernel, and there is no need to store this in a variable.
> >
> > There is also no need to use variables for storing the calculations of
> > pgdir_shift and ptrs_per_p4d, as they are easily determined on the fly.
> >
> > This removes the need for two different sources of truth for determining
> > whether 5-level paging is in use: CR4.LA57 always reflects the actual
> > state, and never changes from the point of view of the 64-bit core
> > kernel. The only potential concern is the cost of CR4 accesses, which
> > can be mitigated using alternatives patching based on feature detection.
> >
> > Note that even the decompressor does not manipulate any page tables
> > before updating CR4.LA57, so it can also avoid the associated global
> > variables entirely. However, as it does not implement alternatives
> > patching, the associated ELF sections need to be discarded.
> >
> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > ---
> > arch/x86/boot/compressed/misc.h | 4 --
> > arch/x86/boot/compressed/pgtable_64.c | 12 ----
> > arch/x86/boot/compressed/vmlinux.lds.S | 1 +
> > arch/x86/include/asm/pgtable_64_types.h | 58 ++++++++------------
> > arch/x86/kernel/cpu/common.c | 2 -
> > arch/x86/kernel/head64.c | 33 +----------
> > arch/x86/mm/kasan_init_64.c | 3 -
> > arch/x86/mm/mem_encrypt_identity.c | 9 ---
> > 8 files changed, 27 insertions(+), 95 deletions(-)
> >
> > diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
> > index bc2f0f17fb90..2b15ddd0e177 100644
> > --- a/arch/x86/boot/compressed/misc.h
> > +++ b/arch/x86/boot/compressed/misc.h
> > @@ -16,9 +16,6 @@
> >
> > #define __NO_FORTIFY
> >
> > -/* cpu_feature_enabled() cannot be used this early */
> > -#define USE_EARLY_PGTABLE_L5
> > -
> > /*
> > * Boot stub deals with identity mappings, physical and virtual addresses are
> > * the same, so override these defines.
> > @@ -178,7 +175,6 @@ static inline int count_immovable_mem_regions(void) { return 0; }
> > #endif
> >
> > /* ident_map_64.c */
> > -extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
> > extern void kernel_add_identity_map(unsigned long start, unsigned long end);
> >
> > /* Used by PAGE_KERN* macros: */
> > diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
> > index 51f957b24ba7..ae72f53f5e77 100644
> > --- a/arch/x86/boot/compressed/pgtable_64.c
> > +++ b/arch/x86/boot/compressed/pgtable_64.c
> > @@ -9,13 +9,6 @@
> > #define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
> > #define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
> >
> > -#ifdef CONFIG_X86_5LEVEL
> > -/* __pgtable_l5_enabled needs to be in .data to avoid being cleared along with .bss */
> > -unsigned int __section(".data") __pgtable_l5_enabled;
> > -unsigned int __section(".data") pgdir_shift = 39;
> > -unsigned int __section(".data") ptrs_per_p4d = 1;
> > -#endif
> > -
> > /* Buffer to preserve trampoline memory */
> > static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
> >
> > @@ -125,11 +118,6 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
> > native_cpuid_eax(0) >= 7 &&
> > (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
> > l5_required = true;
> > -
> > - /* Initialize variables for 5-level paging */
> > - __pgtable_l5_enabled = 1;
> > - pgdir_shift = 48;
> > - ptrs_per_p4d = 512;
> > }
> >
> > /*
> > diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
> > index 083ec6d7722a..06358bb067fe 100644
> > --- a/arch/x86/boot/compressed/vmlinux.lds.S
> > +++ b/arch/x86/boot/compressed/vmlinux.lds.S
> > @@ -81,6 +81,7 @@ SECTIONS
> > *(.dynamic) *(.dynsym) *(.dynstr) *(.dynbss)
> > *(.hash) *(.gnu.hash)
> > *(.note.*)
> > + *(.altinstructions .altinstr_replacement)
> > }
> >
> > .got.plt (INFO) : {
> > diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
> > index 38b54b992f32..6a57bfdff52b 100644
> > --- a/arch/x86/include/asm/pgtable_64_types.h
> > +++ b/arch/x86/include/asm/pgtable_64_types.h
> > @@ -6,7 +6,10 @@
> >
> > #ifndef __ASSEMBLY__
> > #include <linux/types.h>
> > +#include <asm/alternative.h>
> > +#include <asm/cpufeatures.h>
> > #include <asm/kaslr.h>
> > +#include <asm/processor-flags.h>
> >
> > /*
> > * These are used to make use of C type-checking..
> > @@ -21,63 +24,50 @@ typedef unsigned long pgprotval_t;
> > typedef struct { pteval_t pte; } pte_t;
> > typedef struct { pmdval_t pmd; } pmd_t;
> >
> > -#ifdef CONFIG_X86_5LEVEL
> > -extern unsigned int __pgtable_l5_enabled;
> > -
> > -#ifdef USE_EARLY_PGTABLE_L5
> > -/*
> > - * cpu_feature_enabled() is not available in early boot code.
> > - * Use variable instead.
> > - */
> > -static inline bool pgtable_l5_enabled(void)
> > +static __always_inline __pure bool pgtable_l5_enabled(void)
> > {
> > - return __pgtable_l5_enabled;
> > -}
> > -#else
> > -#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
> > -#endif /* USE_EARLY_PGTABLE_L5 */
> > + unsigned long r;
> > + bool ret;
> >
> > -#else
> > -#define pgtable_l5_enabled() 0
> > -#endif /* CONFIG_X86_5LEVEL */
> > + if (!IS_ENABLED(CONFIG_X86_5LEVEL))
> > + return false;
> > +
> > + asm(ALTERNATIVE_TERNARY(
> > + "movq %%cr4, %[reg] \n\t btl %[la57], %k[reg]" CC_SET(c),
> > + %P[feat], "stc", "clc")
> > + : [reg] "=&r" (r), CC_OUT(c) (ret)
> > + : [feat] "i" (X86_FEATURE_LA57),
> > + [la57] "i" (X86_CR4_LA57_BIT)
> > + : "cc");
>
> This should be more like _static_cpu_has(), where the runtime test is
> out of line in a discardable section, and the inline part is just a
> JMP or NOP.
>
Why exactly? It matters very little in terms of space, a cross-section
jump is 5 bytes, and movq+btl is 7 bytes.
If you are referring to the use of the C flag: this way, it is left up
to the compiler to decide whether a branch or a conditional move is
more suitable, rather than forcing the use of a branch in one of the
two cases.
On Wed, Feb 14, 2024 at 10:45 AM Ard Biesheuvel <ardb@kernel.org> wrote:
>
> On Wed, 14 Feb 2024 at 16:24, Brian Gerst <brgerst@gmail.com> wrote:
> >
> > On Tue, Feb 13, 2024 at 7:42 AM Ard Biesheuvel <ardb+git@googlecom> wrote:
> > >
> > > From: Ard Biesheuvel <ardb@kernel.org>
> > >
> > > On x86_64, the core kernel is entered in long mode, which implies that
> > > paging is enabled. This means that the CR4.LA57 control bit is
> > > guaranteed to be in sync with the number of paging levels used by the
> > > kernel, and there is no need to store this in a variable.
> > >
> > > There is also no need to use variables for storing the calculations of
> > > pgdir_shift and ptrs_per_p4d, as they are easily determined on the fly.
> > >
> > > This removes the need for two different sources of truth for determining
> > > whether 5-level paging is in use: CR4.LA57 always reflects the actual
> > > state, and never changes from the point of view of the 64-bit core
> > > kernel. The only potential concern is the cost of CR4 accesses, which
> > > can be mitigated using alternatives patching based on feature detection.
> > >
> > > Note that even the decompressor does not manipulate any page tables
> > > before updating CR4.LA57, so it can also avoid the associated global
> > > variables entirely. However, as it does not implement alternatives
> > > patching, the associated ELF sections need to be discarded.
> > >
> > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > > ---
> > > arch/x86/boot/compressed/misc.h | 4 --
> > > arch/x86/boot/compressed/pgtable_64.c | 12 ----
> > > arch/x86/boot/compressed/vmlinux.lds.S | 1 +
> > > arch/x86/include/asm/pgtable_64_types.h | 58 ++++++++------------
> > > arch/x86/kernel/cpu/common.c | 2 -
> > > arch/x86/kernel/head64.c | 33 +----------
> > > arch/x86/mm/kasan_init_64.c | 3 -
> > > arch/x86/mm/mem_encrypt_identity.c | 9 ---
> > > 8 files changed, 27 insertions(+), 95 deletions(-)
> > >
> > > diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
> > > index bc2f0f17fb90..2b15ddd0e177 100644
> > > --- a/arch/x86/boot/compressed/misc.h
> > > +++ b/arch/x86/boot/compressed/misc.h
> > > @@ -16,9 +16,6 @@
> > >
> > > #define __NO_FORTIFY
> > >
> > > -/* cpu_feature_enabled() cannot be used this early */
> > > -#define USE_EARLY_PGTABLE_L5
> > > -
> > > /*
> > > * Boot stub deals with identity mappings, physical and virtual addresses are
> > > * the same, so override these defines.
> > > @@ -178,7 +175,6 @@ static inline int count_immovable_mem_regions(void) { return 0; }
> > > #endif
> > >
> > > /* ident_map_64.c */
> > > -extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
> > > extern void kernel_add_identity_map(unsigned long start, unsigned long end);
> > >
> > > /* Used by PAGE_KERN* macros: */
> > > diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
> > > index 51f957b24ba7..ae72f53f5e77 100644
> > > --- a/arch/x86/boot/compressed/pgtable_64.c
> > > +++ b/arch/x86/boot/compressed/pgtable_64.c
> > > @@ -9,13 +9,6 @@
> > > #define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
> > > #define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
> > >
> > > -#ifdef CONFIG_X86_5LEVEL
> > > -/* __pgtable_l5_enabled needs to be in .data to avoid being cleared along with .bss */
> > > -unsigned int __section(".data") __pgtable_l5_enabled;
> > > -unsigned int __section(".data") pgdir_shift = 39;
> > > -unsigned int __section(".data") ptrs_per_p4d = 1;
> > > -#endif
> > > -
> > > /* Buffer to preserve trampoline memory */
> > > static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
> > >
> > > @@ -125,11 +118,6 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
> > > native_cpuid_eax(0) >= 7 &&
> > > (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
> > > l5_required = true;
> > > -
> > > - /* Initialize variables for 5-level paging */
> > > - __pgtable_l5_enabled = 1;
> > > - pgdir_shift = 48;
> > > - ptrs_per_p4d = 512;
> > > }
> > >
> > > /*
> > > diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
> > > index 083ec6d7722a..06358bb067fe 100644
> > > --- a/arch/x86/boot/compressed/vmlinux.lds.S
> > > +++ b/arch/x86/boot/compressed/vmlinux.lds.S
> > > @@ -81,6 +81,7 @@ SECTIONS
> > > *(.dynamic) *(.dynsym) *(.dynstr) *(.dynbss)
> > > *(.hash) *(.gnu.hash)
> > > *(.note.*)
> > > + *(.altinstructions .altinstr_replacement)
> > > }
> > >
> > > .got.plt (INFO) : {
> > > diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
> > > index 38b54b992f32..6a57bfdff52b 100644
> > > --- a/arch/x86/include/asm/pgtable_64_types.h
> > > +++ b/arch/x86/include/asm/pgtable_64_types.h
> > > @@ -6,7 +6,10 @@
> > >
> > > #ifndef __ASSEMBLY__
> > > #include <linux/types.h>
> > > +#include <asm/alternative.h>
> > > +#include <asm/cpufeatures.h>
> > > #include <asm/kaslr.h>
> > > +#include <asm/processor-flags.h>
> > >
> > > /*
> > > * These are used to make use of C type-checking..
> > > @@ -21,63 +24,50 @@ typedef unsigned long pgprotval_t;
> > > typedef struct { pteval_t pte; } pte_t;
> > > typedef struct { pmdval_t pmd; } pmd_t;
> > >
> > > -#ifdef CONFIG_X86_5LEVEL
> > > -extern unsigned int __pgtable_l5_enabled;
> > > -
> > > -#ifdef USE_EARLY_PGTABLE_L5
> > > -/*
> > > - * cpu_feature_enabled() is not available in early boot code.
> > > - * Use variable instead.
> > > - */
> > > -static inline bool pgtable_l5_enabled(void)
> > > +static __always_inline __pure bool pgtable_l5_enabled(void)
> > > {
> > > - return __pgtable_l5_enabled;
> > > -}
> > > -#else
> > > -#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
> > > -#endif /* USE_EARLY_PGTABLE_L5 */
> > > + unsigned long r;
> > > + bool ret;
> > >
> > > -#else
> > > -#define pgtable_l5_enabled() 0
> > > -#endif /* CONFIG_X86_5LEVEL */
> > > + if (!IS_ENABLED(CONFIG_X86_5LEVEL))
> > > + return false;
> > > +
> > > + asm(ALTERNATIVE_TERNARY(
> > > + "movq %%cr4, %[reg] \n\t btl %[la57], %k[reg]" CC_SET(c),
> > > + %P[feat], "stc", "clc")
> > > + : [reg] "=&r" (r), CC_OUT(c) (ret)
> > > + : [feat] "i" (X86_FEATURE_LA57),
> > > + [la57] "i" (X86_CR4_LA57_BIT)
> > > + : "cc");
> >
> > This should be more like _static_cpu_has(), where the runtime test is
> > out of line in a discardable section, and the inline part is just a
> > JMP or NOP.
> >
>
> Why exactly? It matters very little in terms of space, a cross-section
> jump is 5 bytes, and movq+btl is 7 bytes.
>
> If you are referring to the use of the C flag: this way, it is left up
> to the compiler to decide whether a branch or a conditional move is
> more suitable, rather than forcing the use of a branch in one of the
> two cases.
You're probably right in that many uses of pgtable_l5_enabled() are
choosing between two constants, and static_cpu_has() does not handle
that case very efficiently. Something like static_cpu_choose(feature,
yes_val, no_val) would be a possible idea to explore.
Brian Gerst
@@ -16,9 +16,6 @@
#define __NO_FORTIFY
-/* cpu_feature_enabled() cannot be used this early */
-#define USE_EARLY_PGTABLE_L5
-
/*
* Boot stub deals with identity mappings, physical and virtual addresses are
* the same, so override these defines.
@@ -178,7 +175,6 @@ static inline int count_immovable_mem_regions(void) { return 0; }
#endif
/* ident_map_64.c */
-extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
extern void kernel_add_identity_map(unsigned long start, unsigned long end);
/* Used by PAGE_KERN* macros: */
@@ -9,13 +9,6 @@
#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
-#ifdef CONFIG_X86_5LEVEL
-/* __pgtable_l5_enabled needs to be in .data to avoid being cleared along with .bss */
-unsigned int __section(".data") __pgtable_l5_enabled;
-unsigned int __section(".data") pgdir_shift = 39;
-unsigned int __section(".data") ptrs_per_p4d = 1;
-#endif
-
/* Buffer to preserve trampoline memory */
static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
@@ -125,11 +118,6 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
native_cpuid_eax(0) >= 7 &&
(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
l5_required = true;
-
- /* Initialize variables for 5-level paging */
- __pgtable_l5_enabled = 1;
- pgdir_shift = 48;
- ptrs_per_p4d = 512;
}
/*
@@ -81,6 +81,7 @@ SECTIONS
*(.dynamic) *(.dynsym) *(.dynstr) *(.dynbss)
*(.hash) *(.gnu.hash)
*(.note.*)
+ *(.altinstructions .altinstr_replacement)
}
.got.plt (INFO) : {
@@ -6,7 +6,10 @@
#ifndef __ASSEMBLY__
#include <linux/types.h>
+#include <asm/alternative.h>
+#include <asm/cpufeatures.h>
#include <asm/kaslr.h>
+#include <asm/processor-flags.h>
/*
* These are used to make use of C type-checking..
@@ -21,63 +24,50 @@ typedef unsigned long pgprotval_t;
typedef struct { pteval_t pte; } pte_t;
typedef struct { pmdval_t pmd; } pmd_t;
-#ifdef CONFIG_X86_5LEVEL
-extern unsigned int __pgtable_l5_enabled;
-
-#ifdef USE_EARLY_PGTABLE_L5
-/*
- * cpu_feature_enabled() is not available in early boot code.
- * Use variable instead.
- */
-static inline bool pgtable_l5_enabled(void)
+static __always_inline __pure bool pgtable_l5_enabled(void)
{
- return __pgtable_l5_enabled;
-}
-#else
-#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
-#endif /* USE_EARLY_PGTABLE_L5 */
+ unsigned long r;
+ bool ret;
-#else
-#define pgtable_l5_enabled() 0
-#endif /* CONFIG_X86_5LEVEL */
+ if (!IS_ENABLED(CONFIG_X86_5LEVEL))
+ return false;
+
+ asm(ALTERNATIVE_TERNARY(
+ "movq %%cr4, %[reg] \n\t btl %[la57], %k[reg]" CC_SET(c),
+ %P[feat], "stc", "clc")
+ : [reg] "=&r" (r), CC_OUT(c) (ret)
+ : [feat] "i" (X86_FEATURE_LA57),
+ [la57] "i" (X86_CR4_LA57_BIT)
+ : "cc");
-extern unsigned int pgdir_shift;
-extern unsigned int ptrs_per_p4d;
+ return ret;
+}
#endif /* !__ASSEMBLY__ */
#define SHARED_KERNEL_PMD 0
-#ifdef CONFIG_X86_5LEVEL
-
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
-#define PGDIR_SHIFT pgdir_shift
+#define PGDIR_SHIFT (pgtable_l5_enabled() ? 48 : 39)
#define PTRS_PER_PGD 512
/*
* 4th level page in 5-level paging case
*/
#define P4D_SHIFT 39
+#ifdef CONFIG_X86_5LEVEL
#define MAX_PTRS_PER_P4D 512
-#define PTRS_PER_P4D ptrs_per_p4d
+#else
+#define MAX_PTRS_PER_P4D 1
+#endif
+#define PTRS_PER_P4D (pgtable_l5_enabled() ? 512 : 1)
#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
#define P4D_MASK (~(P4D_SIZE - 1))
#define MAX_POSSIBLE_PHYSMEM_BITS 52
-#else /* CONFIG_X86_5LEVEL */
-
-/*
- * PGDIR_SHIFT determines what a top-level page table entry can map
- */
-#define PGDIR_SHIFT 39
-#define PTRS_PER_PGD 512
-#define MAX_PTRS_PER_P4D 1
-
-#endif /* CONFIG_X86_5LEVEL */
-
/*
* 3rd level page
*/
@@ -1,6 +1,4 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* cpu_feature_enabled() cannot be used this early */
-#define USE_EARLY_PGTABLE_L5
#include <linux/memblock.h>
#include <linux/linkage.h>
@@ -7,9 +7,6 @@
#define DISABLE_BRANCH_PROFILING
-/* cpu_feature_enabled() cannot be used this early */
-#define USE_EARLY_PGTABLE_L5
-
#include <linux/init.h>
#include <linux/linkage.h>
#include <linux/types.h>
@@ -52,14 +49,6 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
static unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
-#ifdef CONFIG_X86_5LEVEL
-unsigned int __pgtable_l5_enabled __ro_after_init;
-unsigned int pgdir_shift __ro_after_init = 39;
-EXPORT_SYMBOL(pgdir_shift);
-unsigned int ptrs_per_p4d __ro_after_init = 1;
-EXPORT_SYMBOL(ptrs_per_p4d);
-#endif
-
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
EXPORT_SYMBOL(page_offset_base);
@@ -78,21 +67,6 @@ static struct desc_struct startup_gdt[GDT_ENTRIES] __initdata = {
[GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(DESC_DATA64, 0, 0xfffff),
};
-static inline bool check_la57_support(void)
-{
- if (!IS_ENABLED(CONFIG_X86_5LEVEL))
- return false;
-
- /*
- * 5-level paging is detected and enabled at kernel decompression
- * stage. Only check if it has been enabled there.
- */
- if (!(native_read_cr4() & X86_CR4_LA57))
- return false;
-
- return true;
-}
-
static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd)
{
unsigned long vaddr, vaddr_end;
@@ -155,7 +129,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
bool la57;
int i;
- la57 = check_la57_support();
+ la57 = pgtable_l5_enabled();
/* Is the address too large? */
if (physaddr >> MAX_PHYSMEM_BITS)
@@ -438,10 +412,7 @@ asmlinkage __visible void __init __noreturn x86_64_start_kernel(char * real_mode
(__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
- if (check_la57_support()) {
- __pgtable_l5_enabled = 1;
- pgdir_shift = 48;
- ptrs_per_p4d = 512;
+ if (pgtable_l5_enabled()) {
page_offset_base = __PAGE_OFFSET_BASE_L5;
vmalloc_base = __VMALLOC_BASE_L5;
vmemmap_base = __VMEMMAP_BASE_L5;
@@ -2,9 +2,6 @@
#define DISABLE_BRANCH_PROFILING
#define pr_fmt(fmt) "kasan: " fmt
-/* cpu_feature_enabled() cannot be used this early */
-#define USE_EARLY_PGTABLE_L5
-
#include <linux/memblock.h>
#include <linux/kasan.h>
#include <linux/kdebug.h>
@@ -27,15 +27,6 @@
#undef CONFIG_PARAVIRT_XXL
#undef CONFIG_PARAVIRT_SPINLOCKS
-/*
- * This code runs before CPU feature bits are set. By default, the
- * pgtable_l5_enabled() function uses bit X86_FEATURE_LA57 to determine if
- * 5-level paging is active, so that won't work here. USE_EARLY_PGTABLE_L5
- * is provided to handle this situation and, instead, use a variable that
- * has been set by the early boot code.
- */
-#define USE_EARLY_PGTABLE_L5
-
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/mem_encrypt.h>