[v2,-next,1/4] RISC-V: ACPI: Enhance acpi_os_ioremap with MMIO remapping

Message ID 20230927170015.295232-2-sunilvl@ventanamicro.com
State New
Headers
Series RISC-V: ACPI improvements |

Commit Message

Sunil V L Sept. 27, 2023, 5 p.m. UTC
  Enhance the acpi_os_ioremap() to support opregions in MMIO space. Also,
have strict checks using EFI memory map to allow remapping the RAM similar
to arm64.

Signed-off-by: Sunil V L <sunilvl@ventanamicro.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
---
 arch/riscv/Kconfig       |  1 +
 arch/riscv/kernel/acpi.c | 87 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 86 insertions(+), 2 deletions(-)
  

Comments

Conor Dooley Oct. 2, 2023, 3:53 p.m. UTC | #1
On Wed, Sep 27, 2023 at 10:30:12PM +0530, Sunil V L wrote:
> Enhance the acpi_os_ioremap() to support opregions in MMIO space. Also,
> have strict checks using EFI memory map to allow remapping the RAM similar
> to arm64.
> 
> Signed-off-by: Sunil V L <sunilvl@ventanamicro.com>
> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>

Thanks for resending this. If you respin, I think it would be worth
mentioning here that you are aligning things with arm64.

Acked-by: Conor Dooley <conor.dooley@microchip.com>

Cheer,
Conor.

> ---
>  arch/riscv/Kconfig       |  1 +
>  arch/riscv/kernel/acpi.c | 87 +++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 86 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index d607ab0f7c6d..ac039cf8af7a 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -39,6 +39,7 @@ config RISCV
>  	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
>  	select ARCH_HAS_UBSAN_SANITIZE_ALL
>  	select ARCH_HAS_VDSO_DATA
> +	select ARCH_KEEP_MEMBLOCK
>  	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
>  	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
>  	select ARCH_STACKWALK
> diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c
> index 56cb2c986c48..e619edc8b0cc 100644
> --- a/arch/riscv/kernel/acpi.c
> +++ b/arch/riscv/kernel/acpi.c
> @@ -14,9 +14,10 @@
>   */
>  
>  #include <linux/acpi.h>
> +#include <linux/efi.h>
>  #include <linux/io.h>
> +#include <linux/memblock.h>
>  #include <linux/pci.h>
> -#include <linux/efi.h>
>  
>  int acpi_noirq = 1;		/* skip ACPI IRQ initialization */
>  int acpi_disabled = 1;
> @@ -217,7 +218,89 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
>  
>  void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
>  {
> -	return (void __iomem *)memremap(phys, size, MEMREMAP_WB);
> +	efi_memory_desc_t *md, *region = NULL;
> +	pgprot_t prot;
> +
> +	if (WARN_ON_ONCE(!efi_enabled(EFI_MEMMAP)))
> +		return NULL;
> +
> +	for_each_efi_memory_desc(md) {
> +		u64 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
> +
> +		if (phys < md->phys_addr || phys >= end)
> +			continue;
> +
> +		if (phys + size > end) {
> +			pr_warn(FW_BUG "requested region covers multiple EFI memory regions\n");
> +			return NULL;
> +		}
> +		region = md;
> +		break;
> +	}
> +
> +	/*
> +	 * It is fine for AML to remap regions that are not represented in the
> +	 * EFI memory map at all, as it only describes normal memory, and MMIO
> +	 * regions that require a virtual mapping to make them accessible to
> +	 * the EFI runtime services.
> +	 */
> +	prot = PAGE_KERNEL_IO;
> +	if (region) {
> +		switch (region->type) {
> +		case EFI_LOADER_CODE:
> +		case EFI_LOADER_DATA:
> +		case EFI_BOOT_SERVICES_CODE:
> +		case EFI_BOOT_SERVICES_DATA:
> +		case EFI_CONVENTIONAL_MEMORY:
> +		case EFI_PERSISTENT_MEMORY:
> +			if (memblock_is_map_memory(phys) ||
> +			    !memblock_is_region_memory(phys, size)) {
> +				pr_warn(FW_BUG "requested region covers kernel memory\n");
> +				return NULL;
> +			}
> +
> +			/*
> +			 * Mapping kernel memory is permitted if the region in
> +			 * question is covered by a single memblock with the
> +			 * NOMAP attribute set: this enables the use of ACPI
> +			 * table overrides passed via initramfs.
> +			 * This particular use case only requires read access.
> +			 */
> +			fallthrough;
> +
> +		case EFI_RUNTIME_SERVICES_CODE:
> +			/*
> +			 * This would be unusual, but not problematic per se,
> +			 * as long as we take care not to create a writable
> +			 * mapping for executable code.
> +			 */
> +			prot = PAGE_KERNEL_RO;
> +			break;
> +
> +		case EFI_ACPI_RECLAIM_MEMORY:
> +			/*
> +			 * ACPI reclaim memory is used to pass firmware tables
> +			 * and other data that is intended for consumption by
> +			 * the OS only, which may decide it wants to reclaim
> +			 * that memory and use it for something else. We never
> +			 * do that, but we usually add it to the linear map
> +			 * anyway, in which case we should use the existing
> +			 * mapping.
> +			 */
> +			if (memblock_is_map_memory(phys))
> +				return (void __iomem *)__va(phys);
> +			fallthrough;
> +
> +		default:
> +			if (region->attribute & EFI_MEMORY_WB)
> +				prot = PAGE_KERNEL;
> +			else if ((region->attribute & EFI_MEMORY_WC) ||
> +				 (region->attribute & EFI_MEMORY_WT))
> +				prot = pgprot_writecombine(PAGE_KERNEL);
> +		}
> +	}
> +
> +	return ioremap_prot(phys, size, pgprot_val(prot));
>  }
>  
>  #ifdef CONFIG_PCI
> -- 
> 2.39.2
>
  
Alexandre Ghiti Oct. 3, 2023, 6:53 p.m. UTC | #2
Hi Sunil,

On Wed, Sep 27, 2023 at 7:00 PM Sunil V L <sunilvl@ventanamicro.com> wrote:
>
> Enhance the acpi_os_ioremap() to support opregions in MMIO space. Also,
> have strict checks using EFI memory map to allow remapping the RAM similar
> to arm64.
>
> Signed-off-by: Sunil V L <sunilvl@ventanamicro.com>
> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> ---
>  arch/riscv/Kconfig       |  1 +
>  arch/riscv/kernel/acpi.c | 87 +++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 86 insertions(+), 2 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index d607ab0f7c6d..ac039cf8af7a 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -39,6 +39,7 @@ config RISCV
>         select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
>         select ARCH_HAS_UBSAN_SANITIZE_ALL
>         select ARCH_HAS_VDSO_DATA
> +       select ARCH_KEEP_MEMBLOCK

Shouldn't we restrict this to ACPI?

>         select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
>         select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
>         select ARCH_STACKWALK
> diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c
> index 56cb2c986c48..e619edc8b0cc 100644
> --- a/arch/riscv/kernel/acpi.c
> +++ b/arch/riscv/kernel/acpi.c
> @@ -14,9 +14,10 @@
>   */
>
>  #include <linux/acpi.h>
> +#include <linux/efi.h>
>  #include <linux/io.h>
> +#include <linux/memblock.h>
>  #include <linux/pci.h>
> -#include <linux/efi.h>
>
>  int acpi_noirq = 1;            /* skip ACPI IRQ initialization */
>  int acpi_disabled = 1;
> @@ -217,7 +218,89 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
>
>  void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
>  {
> -       return (void __iomem *)memremap(phys, size, MEMREMAP_WB);
> +       efi_memory_desc_t *md, *region = NULL;
> +       pgprot_t prot;
> +
> +       if (WARN_ON_ONCE(!efi_enabled(EFI_MEMMAP)))
> +               return NULL;
> +
> +       for_each_efi_memory_desc(md) {
> +               u64 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
> +
> +               if (phys < md->phys_addr || phys >= end)
> +                       continue;
> +
> +               if (phys + size > end) {
> +                       pr_warn(FW_BUG "requested region covers multiple EFI memory regions\n");
> +                       return NULL;
> +               }
> +               region = md;
> +               break;
> +       }
> +
> +       /*
> +        * It is fine for AML to remap regions that are not represented in the
> +        * EFI memory map at all, as it only describes normal memory, and MMIO
> +        * regions that require a virtual mapping to make them accessible to
> +        * the EFI runtime services.
> +        */
> +       prot = PAGE_KERNEL_IO;
> +       if (region) {
> +               switch (region->type) {
> +               case EFI_LOADER_CODE:
> +               case EFI_LOADER_DATA:
> +               case EFI_BOOT_SERVICES_CODE:
> +               case EFI_BOOT_SERVICES_DATA:
> +               case EFI_CONVENTIONAL_MEMORY:
> +               case EFI_PERSISTENT_MEMORY:
> +                       if (memblock_is_map_memory(phys) ||
> +                           !memblock_is_region_memory(phys, size)) {
> +                               pr_warn(FW_BUG "requested region covers kernel memory\n");
> +                               return NULL;
> +                       }
> +
> +                       /*
> +                        * Mapping kernel memory is permitted if the region in
> +                        * question is covered by a single memblock with the
> +                        * NOMAP attribute set: this enables the use of ACPI
> +                        * table overrides passed via initramfs.
> +                        * This particular use case only requires read access.
> +                        */
> +                       fallthrough;
> +
> +               case EFI_RUNTIME_SERVICES_CODE:
> +                       /*
> +                        * This would be unusual, but not problematic per se,
> +                        * as long as we take care not to create a writable
> +                        * mapping for executable code.
> +                        */
> +                       prot = PAGE_KERNEL_RO;
> +                       break;
> +
> +               case EFI_ACPI_RECLAIM_MEMORY:
> +                       /*
> +                        * ACPI reclaim memory is used to pass firmware tables
> +                        * and other data that is intended for consumption by
> +                        * the OS only, which may decide it wants to reclaim
> +                        * that memory and use it for something else. We never
> +                        * do that, but we usually add it to the linear map
> +                        * anyway, in which case we should use the existing
> +                        * mapping.
> +                        */
> +                       if (memblock_is_map_memory(phys))
> +                               return (void __iomem *)__va(phys);
> +                       fallthrough;
> +
> +               default:
> +                       if (region->attribute & EFI_MEMORY_WB)
> +                               prot = PAGE_KERNEL;
> +                       else if ((region->attribute & EFI_MEMORY_WC) ||
> +                                (region->attribute & EFI_MEMORY_WT))
> +                               prot = pgprot_writecombine(PAGE_KERNEL);

I have to ask: why is write-through mapped to write-combined here?

> +               }
> +       }
> +
> +       return ioremap_prot(phys, size, pgprot_val(prot));
>  }
>
>  #ifdef CONFIG_PCI
> --
> 2.39.2
>

Like Andrew said in v1, too bad we can't merge that with arm64 instead
of duplicating.

But otherwise, you can add:

Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>

Thanks,

Alex
  
Sunil V L Oct. 4, 2023, 10:33 a.m. UTC | #3
Hi Alex,

On Tue, Oct 03, 2023 at 08:53:12PM +0200, Alexandre Ghiti wrote:
> Hi Sunil,
> 
> On Wed, Sep 27, 2023 at 7:00 PM Sunil V L <sunilvl@ventanamicro.com> wrote:
> >
> > Enhance the acpi_os_ioremap() to support opregions in MMIO space. Also,
> > have strict checks using EFI memory map to allow remapping the RAM similar
> > to arm64.
> >
> > Signed-off-by: Sunil V L <sunilvl@ventanamicro.com>
> > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> > ---
> >  arch/riscv/Kconfig       |  1 +
> >  arch/riscv/kernel/acpi.c | 87 +++++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 86 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index d607ab0f7c6d..ac039cf8af7a 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -39,6 +39,7 @@ config RISCV
> >         select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
> >         select ARCH_HAS_UBSAN_SANITIZE_ALL
> >         select ARCH_HAS_VDSO_DATA
> > +       select ARCH_KEEP_MEMBLOCK
> 
> Shouldn't we restrict this to ACPI?
> 
Sure, Let me update.

> >         select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
> >         select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
> >         select ARCH_STACKWALK
> > diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c
> > index 56cb2c986c48..e619edc8b0cc 100644
> > --- a/arch/riscv/kernel/acpi.c
> > +++ b/arch/riscv/kernel/acpi.c
> > @@ -14,9 +14,10 @@
> >   */
> >
> >  #include <linux/acpi.h>
> > +#include <linux/efi.h>
> >  #include <linux/io.h>
> > +#include <linux/memblock.h>
> >  #include <linux/pci.h>
> > -#include <linux/efi.h>
> >
> >  int acpi_noirq = 1;            /* skip ACPI IRQ initialization */
> >  int acpi_disabled = 1;
> > @@ -217,7 +218,89 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
> >
> >  void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
> >  {
> > -       return (void __iomem *)memremap(phys, size, MEMREMAP_WB);
> > +       efi_memory_desc_t *md, *region = NULL;
> > +       pgprot_t prot;
> > +
> > +       if (WARN_ON_ONCE(!efi_enabled(EFI_MEMMAP)))
> > +               return NULL;
> > +
> > +       for_each_efi_memory_desc(md) {
> > +               u64 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
> > +
> > +               if (phys < md->phys_addr || phys >= end)
> > +                       continue;
> > +
> > +               if (phys + size > end) {
> > +                       pr_warn(FW_BUG "requested region covers multiple EFI memory regions\n");
> > +                       return NULL;
> > +               }
> > +               region = md;
> > +               break;
> > +       }
> > +
> > +       /*
> > +        * It is fine for AML to remap regions that are not represented in the
> > +        * EFI memory map at all, as it only describes normal memory, and MMIO
> > +        * regions that require a virtual mapping to make them accessible to
> > +        * the EFI runtime services.
> > +        */
> > +       prot = PAGE_KERNEL_IO;
> > +       if (region) {
> > +               switch (region->type) {
> > +               case EFI_LOADER_CODE:
> > +               case EFI_LOADER_DATA:
> > +               case EFI_BOOT_SERVICES_CODE:
> > +               case EFI_BOOT_SERVICES_DATA:
> > +               case EFI_CONVENTIONAL_MEMORY:
> > +               case EFI_PERSISTENT_MEMORY:
> > +                       if (memblock_is_map_memory(phys) ||
> > +                           !memblock_is_region_memory(phys, size)) {
> > +                               pr_warn(FW_BUG "requested region covers kernel memory\n");
> > +                               return NULL;
> > +                       }
> > +
> > +                       /*
> > +                        * Mapping kernel memory is permitted if the region in
> > +                        * question is covered by a single memblock with the
> > +                        * NOMAP attribute set: this enables the use of ACPI
> > +                        * table overrides passed via initramfs.
> > +                        * This particular use case only requires read access.
> > +                        */
> > +                       fallthrough;
> > +
> > +               case EFI_RUNTIME_SERVICES_CODE:
> > +                       /*
> > +                        * This would be unusual, but not problematic per se,
> > +                        * as long as we take care not to create a writable
> > +                        * mapping for executable code.
> > +                        */
> > +                       prot = PAGE_KERNEL_RO;
> > +                       break;
> > +
> > +               case EFI_ACPI_RECLAIM_MEMORY:
> > +                       /*
> > +                        * ACPI reclaim memory is used to pass firmware tables
> > +                        * and other data that is intended for consumption by
> > +                        * the OS only, which may decide it wants to reclaim
> > +                        * that memory and use it for something else. We never
> > +                        * do that, but we usually add it to the linear map
> > +                        * anyway, in which case we should use the existing
> > +                        * mapping.
> > +                        */
> > +                       if (memblock_is_map_memory(phys))
> > +                               return (void __iomem *)__va(phys);
> > +                       fallthrough;
> > +
> > +               default:
> > +                       if (region->attribute & EFI_MEMORY_WB)
> > +                               prot = PAGE_KERNEL;
> > +                       else if ((region->attribute & EFI_MEMORY_WC) ||
> > +                                (region->attribute & EFI_MEMORY_WT))
> > +                               prot = pgprot_writecombine(PAGE_KERNEL);
> 
> I have to ask: why is write-through mapped to write-combined here?
> 
IIUC, write-through ensures the copy in the cache and memory are always
in sync. So, instead of using WB, non-cacheable WC is used as RISC-V
doesn't really define these attributes. Let me know if this is not
correct.

> > +               }
> > +       }
> > +
> > +       return ioremap_prot(phys, size, pgprot_val(prot));
> >  }
> >
> >  #ifdef CONFIG_PCI
> > --
> > 2.39.2
> >
> 
> Like Andrew said in v1, too bad we can't merge that with arm64 instead
> of duplicating.
> 
I agree. But since acpi_os_ioremap() is supposed to be arch function, I
kept is separate. Also, I need feedback from Ard whether we should make
it common and where to add this common function.

> But otherwise, you can add:
> 
> Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> 

Thanks!
Sunil
  

Patch

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index d607ab0f7c6d..ac039cf8af7a 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -39,6 +39,7 @@  config RISCV
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARCH_HAS_VDSO_DATA
+	select ARCH_KEEP_MEMBLOCK
 	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
 	select ARCH_STACKWALK
diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c
index 56cb2c986c48..e619edc8b0cc 100644
--- a/arch/riscv/kernel/acpi.c
+++ b/arch/riscv/kernel/acpi.c
@@ -14,9 +14,10 @@ 
  */
 
 #include <linux/acpi.h>
+#include <linux/efi.h>
 #include <linux/io.h>
+#include <linux/memblock.h>
 #include <linux/pci.h>
-#include <linux/efi.h>
 
 int acpi_noirq = 1;		/* skip ACPI IRQ initialization */
 int acpi_disabled = 1;
@@ -217,7 +218,89 @@  void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
 
 void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
 {
-	return (void __iomem *)memremap(phys, size, MEMREMAP_WB);
+	efi_memory_desc_t *md, *region = NULL;
+	pgprot_t prot;
+
+	if (WARN_ON_ONCE(!efi_enabled(EFI_MEMMAP)))
+		return NULL;
+
+	for_each_efi_memory_desc(md) {
+		u64 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+
+		if (phys < md->phys_addr || phys >= end)
+			continue;
+
+		if (phys + size > end) {
+			pr_warn(FW_BUG "requested region covers multiple EFI memory regions\n");
+			return NULL;
+		}
+		region = md;
+		break;
+	}
+
+	/*
+	 * It is fine for AML to remap regions that are not represented in the
+	 * EFI memory map at all, as it only describes normal memory, and MMIO
+	 * regions that require a virtual mapping to make them accessible to
+	 * the EFI runtime services.
+	 */
+	prot = PAGE_KERNEL_IO;
+	if (region) {
+		switch (region->type) {
+		case EFI_LOADER_CODE:
+		case EFI_LOADER_DATA:
+		case EFI_BOOT_SERVICES_CODE:
+		case EFI_BOOT_SERVICES_DATA:
+		case EFI_CONVENTIONAL_MEMORY:
+		case EFI_PERSISTENT_MEMORY:
+			if (memblock_is_map_memory(phys) ||
+			    !memblock_is_region_memory(phys, size)) {
+				pr_warn(FW_BUG "requested region covers kernel memory\n");
+				return NULL;
+			}
+
+			/*
+			 * Mapping kernel memory is permitted if the region in
+			 * question is covered by a single memblock with the
+			 * NOMAP attribute set: this enables the use of ACPI
+			 * table overrides passed via initramfs.
+			 * This particular use case only requires read access.
+			 */
+			fallthrough;
+
+		case EFI_RUNTIME_SERVICES_CODE:
+			/*
+			 * This would be unusual, but not problematic per se,
+			 * as long as we take care not to create a writable
+			 * mapping for executable code.
+			 */
+			prot = PAGE_KERNEL_RO;
+			break;
+
+		case EFI_ACPI_RECLAIM_MEMORY:
+			/*
+			 * ACPI reclaim memory is used to pass firmware tables
+			 * and other data that is intended for consumption by
+			 * the OS only, which may decide it wants to reclaim
+			 * that memory and use it for something else. We never
+			 * do that, but we usually add it to the linear map
+			 * anyway, in which case we should use the existing
+			 * mapping.
+			 */
+			if (memblock_is_map_memory(phys))
+				return (void __iomem *)__va(phys);
+			fallthrough;
+
+		default:
+			if (region->attribute & EFI_MEMORY_WB)
+				prot = PAGE_KERNEL;
+			else if ((region->attribute & EFI_MEMORY_WC) ||
+				 (region->attribute & EFI_MEMORY_WT))
+				prot = pgprot_writecombine(PAGE_KERNEL);
+		}
+	}
+
+	return ioremap_prot(phys, size, pgprot_val(prot));
 }
 
 #ifdef CONFIG_PCI