riscv: Sync efi page table's kernel mappings before switching

Message ID 20221121133303.1782246-1-alexghiti@rivosinc.com
State New
Headers
Series riscv: Sync efi page table's kernel mappings before switching |

Commit Message

Alexandre Ghiti Nov. 21, 2022, 1:33 p.m. UTC
  The EFI page table is initially created as a copy of the kernel page table.
With VMAP_STACK enabled, kernel stacks are allocated in the vmalloc area:
if the stack is allocated in a new PGD (one that was not present at the
moment of the efi page table creation or not synced in a previous vmalloc
fault), the kernel will take a trap when switching to the efi page table
when the vmalloc kernel stack is accessed, resulting in a kernel panic.

Fix that by updating the efi kernel mappings before switching to the efi
page table.

Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
 arch/riscv/include/asm/efi.h     |  6 +++++-
 arch/riscv/include/asm/pgalloc.h | 11 ++++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)
  

Comments

Conor Dooley Nov. 22, 2022, 8:47 a.m. UTC | #1
On Mon, Nov 21, 2022 at 02:33:03PM +0100, Alexandre Ghiti wrote:
> The EFI page table is initially created as a copy of the kernel page table.
> With VMAP_STACK enabled, kernel stacks are allocated in the vmalloc area:
> if the stack is allocated in a new PGD (one that was not present at the
> moment of the efi page table creation or not synced in a previous vmalloc
> fault), the kernel will take a trap when switching to the efi page table
> when the vmalloc kernel stack is accessed, resulting in a kernel panic.
> 
> Fix that by updating the efi kernel mappings before switching to the efi
> page table.
> 
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>

Hey Alex,
What commit does this fix?

> ---
>  arch/riscv/include/asm/efi.h     |  6 +++++-
>  arch/riscv/include/asm/pgalloc.h | 11 ++++++++---
>  2 files changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h
> index f74879a8f1ea..e229d7be4b66 100644
> --- a/arch/riscv/include/asm/efi.h
> +++ b/arch/riscv/include/asm/efi.h
> @@ -10,6 +10,7 @@
>  #include <asm/mmu_context.h>
>  #include <asm/ptrace.h>
>  #include <asm/tlbflush.h>
> +#include <asm/pgalloc.h>
>  
>  #ifdef CONFIG_EFI
>  extern void efi_init(void);
> @@ -20,7 +21,10 @@ extern void efi_init(void);
>  int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
>  int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
>  
> -#define arch_efi_call_virt_setup()      efi_virtmap_load()
> +#define arch_efi_call_virt_setup()      ({		\
> +		sync_kernel_mappings(efi_mm.pgd);	\
> +		efi_virtmap_load();			\
> +	})
>  #define arch_efi_call_virt_teardown()   efi_virtmap_unload()
>  
>  #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
> index 947f23d7b6af..59dc12b5b7e8 100644
> --- a/arch/riscv/include/asm/pgalloc.h
> +++ b/arch/riscv/include/asm/pgalloc.h
> @@ -127,6 +127,13 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
>  #define __p4d_free_tlb(tlb, p4d, addr)  p4d_free((tlb)->mm, p4d)
>  #endif /* __PAGETABLE_PMD_FOLDED */
>  
> +static inline void sync_kernel_mappings(pgd_t *pgd)
> +{
> +	memcpy(pgd + USER_PTRS_PER_PGD,
> +	       init_mm.pgd + USER_PTRS_PER_PGD,
> +	       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
> +}
> +
>  static inline pgd_t *pgd_alloc(struct mm_struct *mm)
>  {
>  	pgd_t *pgd;
> @@ -135,9 +142,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
>  	if (likely(pgd != NULL)) {
>  		memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
>  		/* Copy kernel mappings */
> -		memcpy(pgd + USER_PTRS_PER_PGD,
> -			init_mm.pgd + USER_PTRS_PER_PGD,
> -			(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
> +		sync_kernel_mappings(pgd);
>  	}
>  	return pgd;
>  }
> -- 
> 2.37.2
>
  
Alexandre Ghiti Nov. 22, 2022, 8:54 a.m. UTC | #2
Hi Conor,

On Tue, Nov 22, 2022 at 9:48 AM Conor Dooley <conor.dooley@microchip.com> wrote:
>
> On Mon, Nov 21, 2022 at 02:33:03PM +0100, Alexandre Ghiti wrote:
> > The EFI page table is initially created as a copy of the kernel page table.
> > With VMAP_STACK enabled, kernel stacks are allocated in the vmalloc area:
> > if the stack is allocated in a new PGD (one that was not present at the
> > moment of the efi page table creation or not synced in a previous vmalloc
> > fault), the kernel will take a trap when switching to the efi page table
> > when the vmalloc kernel stack is accessed, resulting in a kernel panic.
> >
> > Fix that by updating the efi kernel mappings before switching to the efi
> > page table.
> >
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
>
> Hey Alex,
> What commit does this fix?

You're right, I should have added this and +cc Atish:

Fixes: b91540d52a08 ("RISC-V: Add EFI runtime services")

Thanks,

Alex

>
>
> > ---
> >  arch/riscv/include/asm/efi.h     |  6 +++++-
> >  arch/riscv/include/asm/pgalloc.h | 11 ++++++++---
> >  2 files changed, 13 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h
> > index f74879a8f1ea..e229d7be4b66 100644
> > --- a/arch/riscv/include/asm/efi.h
> > +++ b/arch/riscv/include/asm/efi.h
> > @@ -10,6 +10,7 @@
> >  #include <asm/mmu_context.h>
> >  #include <asm/ptrace.h>
> >  #include <asm/tlbflush.h>
> > +#include <asm/pgalloc.h>
> >
> >  #ifdef CONFIG_EFI
> >  extern void efi_init(void);
> > @@ -20,7 +21,10 @@ extern void efi_init(void);
> >  int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
> >  int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
> >
> > -#define arch_efi_call_virt_setup()      efi_virtmap_load()
> > +#define arch_efi_call_virt_setup()      ({           \
> > +             sync_kernel_mappings(efi_mm.pgd);       \
> > +             efi_virtmap_load();                     \
> > +     })
> >  #define arch_efi_call_virt_teardown()   efi_virtmap_unload()
> >
> >  #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
> > diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
> > index 947f23d7b6af..59dc12b5b7e8 100644
> > --- a/arch/riscv/include/asm/pgalloc.h
> > +++ b/arch/riscv/include/asm/pgalloc.h
> > @@ -127,6 +127,13 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
> >  #define __p4d_free_tlb(tlb, p4d, addr)  p4d_free((tlb)->mm, p4d)
> >  #endif /* __PAGETABLE_PMD_FOLDED */
> >
> > +static inline void sync_kernel_mappings(pgd_t *pgd)
> > +{
> > +     memcpy(pgd + USER_PTRS_PER_PGD,
> > +            init_mm.pgd + USER_PTRS_PER_PGD,
> > +            (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
> > +}
> > +
> >  static inline pgd_t *pgd_alloc(struct mm_struct *mm)
> >  {
> >       pgd_t *pgd;
> > @@ -135,9 +142,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
> >       if (likely(pgd != NULL)) {
> >               memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
> >               /* Copy kernel mappings */
> > -             memcpy(pgd + USER_PTRS_PER_PGD,
> > -                     init_mm.pgd + USER_PTRS_PER_PGD,
> > -                     (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
> > +             sync_kernel_mappings(pgd);
> >       }
> >       return pgd;
> >  }
> > --
> > 2.37.2
> >
  
Emil Renner Berthing Nov. 23, 2022, 5:19 p.m. UTC | #3
Hi Alexandre,

On Mon, 21 Nov 2022 at 14:33, Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> The EFI page table is initially created as a copy of the kernel page table.
> With VMAP_STACK enabled, kernel stacks are allocated in the vmalloc area:
> if the stack is allocated in a new PGD (one that was not present at the
> moment of the efi page table creation or not synced in a previous vmalloc
> fault), the kernel will take a trap when switching to the efi page table
> when the vmalloc kernel stack is accessed, resulting in a kernel panic.
>
> Fix that by updating the efi kernel mappings before switching to the efi
> page table.
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>

Thanks for the patch! With this applied on the Ubuntu 5.19 kernel I
can enable CONFIG_VMAP_STACK and cat /sys/firmware/efi/efivars/* on
the Unmatched without locking up. So

Tested-by: Emil Renner Berthing <emil.renner.berthing@canonical.com>

> ---
>  arch/riscv/include/asm/efi.h     |  6 +++++-
>  arch/riscv/include/asm/pgalloc.h | 11 ++++++++---
>  2 files changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h
> index f74879a8f1ea..e229d7be4b66 100644
> --- a/arch/riscv/include/asm/efi.h
> +++ b/arch/riscv/include/asm/efi.h
> @@ -10,6 +10,7 @@
>  #include <asm/mmu_context.h>
>  #include <asm/ptrace.h>
>  #include <asm/tlbflush.h>
> +#include <asm/pgalloc.h>
>
>  #ifdef CONFIG_EFI
>  extern void efi_init(void);
> @@ -20,7 +21,10 @@ extern void efi_init(void);
>  int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
>  int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
>
> -#define arch_efi_call_virt_setup()      efi_virtmap_load()
> +#define arch_efi_call_virt_setup()      ({             \
> +               sync_kernel_mappings(efi_mm.pgd);       \
> +               efi_virtmap_load();                     \
> +       })
>  #define arch_efi_call_virt_teardown()   efi_virtmap_unload()
>
>  #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
> index 947f23d7b6af..59dc12b5b7e8 100644
> --- a/arch/riscv/include/asm/pgalloc.h
> +++ b/arch/riscv/include/asm/pgalloc.h
> @@ -127,6 +127,13 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
>  #define __p4d_free_tlb(tlb, p4d, addr)  p4d_free((tlb)->mm, p4d)
>  #endif /* __PAGETABLE_PMD_FOLDED */
>
> +static inline void sync_kernel_mappings(pgd_t *pgd)
> +{
> +       memcpy(pgd + USER_PTRS_PER_PGD,
> +              init_mm.pgd + USER_PTRS_PER_PGD,
> +              (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
> +}
> +
>  static inline pgd_t *pgd_alloc(struct mm_struct *mm)
>  {
>         pgd_t *pgd;
> @@ -135,9 +142,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
>         if (likely(pgd != NULL)) {
>                 memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
>                 /* Copy kernel mappings */
> -               memcpy(pgd + USER_PTRS_PER_PGD,
> -                       init_mm.pgd + USER_PTRS_PER_PGD,
> -                       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
> +               sync_kernel_mappings(pgd);
>         }
>         return pgd;
>  }
> --
> 2.37.2
>
  
Atish Patra Nov. 23, 2022, 7:12 p.m. UTC | #4
On Wed, Nov 23, 2022 at 9:19 AM Emil Renner Berthing
<emil.renner.berthing@canonical.com> wrote:
>
> Hi Alexandre,
>
> On Mon, 21 Nov 2022 at 14:33, Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >
> > The EFI page table is initially created as a copy of the kernel page table.
> > With VMAP_STACK enabled, kernel stacks are allocated in the vmalloc area:
> > if the stack is allocated in a new PGD (one that was not present at the
> > moment of the efi page table creation or not synced in a previous vmalloc
> > fault), the kernel will take a trap when switching to the efi page table
> > when the vmalloc kernel stack is accessed, resulting in a kernel panic.
> >
> > Fix that by updating the efi kernel mappings before switching to the efi
> > page table.
> >
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
>
> Thanks for the patch! With this applied on the Ubuntu 5.19 kernel I
> can enable CONFIG_VMAP_STACK and cat /sys/firmware/efi/efivars/* on
> the Unmatched without locking up. So
>
> Tested-by: Emil Renner Berthing <emil.renner.berthing@canonical.com>
>
> > ---
> >  arch/riscv/include/asm/efi.h     |  6 +++++-
> >  arch/riscv/include/asm/pgalloc.h | 11 ++++++++---
> >  2 files changed, 13 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h
> > index f74879a8f1ea..e229d7be4b66 100644
> > --- a/arch/riscv/include/asm/efi.h
> > +++ b/arch/riscv/include/asm/efi.h
> > @@ -10,6 +10,7 @@
> >  #include <asm/mmu_context.h>
> >  #include <asm/ptrace.h>
> >  #include <asm/tlbflush.h>
> > +#include <asm/pgalloc.h>
> >
> >  #ifdef CONFIG_EFI
> >  extern void efi_init(void);
> > @@ -20,7 +21,10 @@ extern void efi_init(void);
> >  int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
> >  int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
> >
> > -#define arch_efi_call_virt_setup()      efi_virtmap_load()
> > +#define arch_efi_call_virt_setup()      ({             \
> > +               sync_kernel_mappings(efi_mm.pgd);       \
> > +               efi_virtmap_load();                     \
> > +       })
> >  #define arch_efi_call_virt_teardown()   efi_virtmap_unload()
> >
> >  #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
> > diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
> > index 947f23d7b6af..59dc12b5b7e8 100644
> > --- a/arch/riscv/include/asm/pgalloc.h
> > +++ b/arch/riscv/include/asm/pgalloc.h
> > @@ -127,6 +127,13 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
> >  #define __p4d_free_tlb(tlb, p4d, addr)  p4d_free((tlb)->mm, p4d)
> >  #endif /* __PAGETABLE_PMD_FOLDED */
> >
> > +static inline void sync_kernel_mappings(pgd_t *pgd)
> > +{
> > +       memcpy(pgd + USER_PTRS_PER_PGD,
> > +              init_mm.pgd + USER_PTRS_PER_PGD,
> > +              (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
> > +}
> > +
> >  static inline pgd_t *pgd_alloc(struct mm_struct *mm)
> >  {
> >         pgd_t *pgd;
> > @@ -135,9 +142,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
> >         if (likely(pgd != NULL)) {
> >                 memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
> >                 /* Copy kernel mappings */
> > -               memcpy(pgd + USER_PTRS_PER_PGD,
> > -                       init_mm.pgd + USER_PTRS_PER_PGD,
> > -                       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
> > +               sync_kernel_mappings(pgd);
> >         }
> >         return pgd;
> >  }
> > --
> > 2.37.2
> >

Thanks for the fix. With the correct Fixes tag in commit description:A

Reviewed-by: Atish Patra <atishp@rivosinc.com>
  
Palmer Dabbelt Nov. 29, 2022, 12:37 a.m. UTC | #5
On Mon, 21 Nov 2022 14:33:03 +0100, Alexandre Ghiti wrote:
> The EFI page table is initially created as a copy of the kernel page table.
> With VMAP_STACK enabled, kernel stacks are allocated in the vmalloc area:
> if the stack is allocated in a new PGD (one that was not present at the
> moment of the efi page table creation or not synced in a previous vmalloc
> fault), the kernel will take a trap when switching to the efi page table
> when the vmalloc kernel stack is accessed, resulting in a kernel panic.
> 
> [...]

Applied, thanks!

[1/1] riscv: Sync efi page table's kernel mappings before switching
      https://git.kernel.org/palmer/c/3f105a742725

Best regards,
  
patchwork-bot+linux-riscv@kernel.org Nov. 29, 2022, 12:50 a.m. UTC | #6
Hello:

This patch was applied to riscv/linux.git (fixes)
by Palmer Dabbelt <palmer@rivosinc.com>:

On Mon, 21 Nov 2022 14:33:03 +0100 you wrote:
> The EFI page table is initially created as a copy of the kernel page table.
> With VMAP_STACK enabled, kernel stacks are allocated in the vmalloc area:
> if the stack is allocated in a new PGD (one that was not present at the
> moment of the efi page table creation or not synced in a previous vmalloc
> fault), the kernel will take a trap when switching to the efi page table
> when the vmalloc kernel stack is accessed, resulting in a kernel panic.
> 
> [...]

Here is the summary with links:
  - riscv: Sync efi page table's kernel mappings before switching
    https://git.kernel.org/riscv/c/3f105a742725

You are awesome, thank you!
  

Patch

diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h
index f74879a8f1ea..e229d7be4b66 100644
--- a/arch/riscv/include/asm/efi.h
+++ b/arch/riscv/include/asm/efi.h
@@ -10,6 +10,7 @@ 
 #include <asm/mmu_context.h>
 #include <asm/ptrace.h>
 #include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
 
 #ifdef CONFIG_EFI
 extern void efi_init(void);
@@ -20,7 +21,10 @@  extern void efi_init(void);
 int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
 int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
 
-#define arch_efi_call_virt_setup()      efi_virtmap_load()
+#define arch_efi_call_virt_setup()      ({		\
+		sync_kernel_mappings(efi_mm.pgd);	\
+		efi_virtmap_load();			\
+	})
 #define arch_efi_call_virt_teardown()   efi_virtmap_unload()
 
 #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 947f23d7b6af..59dc12b5b7e8 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -127,6 +127,13 @@  static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
 #define __p4d_free_tlb(tlb, p4d, addr)  p4d_free((tlb)->mm, p4d)
 #endif /* __PAGETABLE_PMD_FOLDED */
 
+static inline void sync_kernel_mappings(pgd_t *pgd)
+{
+	memcpy(pgd + USER_PTRS_PER_PGD,
+	       init_mm.pgd + USER_PTRS_PER_PGD,
+	       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+}
+
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *pgd;
@@ -135,9 +142,7 @@  static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 	if (likely(pgd != NULL)) {
 		memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
 		/* Copy kernel mappings */
-		memcpy(pgd + USER_PTRS_PER_PGD,
-			init_mm.pgd + USER_PTRS_PER_PGD,
-			(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+		sync_kernel_mappings(pgd);
 	}
 	return pgd;
 }