dma-contiguous: support per-numa CMA for all architectures

Message ID 20230512094210.141540-1-yajun.deng@linux.dev
State New
Headers
Series dma-contiguous: support per-numa CMA for all architectures |

Commit Message

Yajun Deng May 12, 2023, 9:42 a.m. UTC
  In the commit b7176c261cdb ("dma-contiguous: provide the ability to
reserve per-numa CMA"), Barry adds DMA_PERNUMA_CMA for ARM64.

But this feature is architecture independent, so support per-numa CMA
for all architectures, and enable it by default if NUMA.

Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
---
 Documentation/admin-guide/kernel-parameters.txt | 2 +-
 arch/arm64/mm/init.c                            | 2 --
 include/linux/dma-map-ops.h                     | 6 ------
 kernel/dma/Kconfig                              | 6 +++---
 kernel/dma/contiguous.c                         | 8 +++++++-
 5 files changed, 11 insertions(+), 13 deletions(-)
  

Comments

Andrew Morton May 12, 2023, 9:08 p.m. UTC | #1
On Fri, 12 May 2023 17:42:10 +0800 Yajun Deng <yajun.deng@linux.dev> wrote:

> In the commit b7176c261cdb ("dma-contiguous: provide the ability to
> reserve per-numa CMA"), Barry adds DMA_PERNUMA_CMA for ARM64.
> 
> But this feature is architecture independent, so support per-numa CMA
> for all architectures, and enable it by default if NUMA.
> 
> ...
>
> --- a/include/linux/dma-map-ops.h
> +++ b/include/linux/dma-map-ops.h
> @@ -168,12 +168,6 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page,
>  }
>  #endif /* CONFIG_DMA_CMA*/
>  
> -#ifdef CONFIG_DMA_PERNUMA_CMA
> -void dma_pernuma_cma_reserve(void);
> -#else
> -static inline void dma_pernuma_cma_reserve(void) { }

It would be a little nicer to retain this line.

> -#endif /* CONFIG_DMA_PERNUMA_CMA */
> -
>  #ifdef CONFIG_DMA_DECLARE_COHERENT
>  int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
>  		dma_addr_t device_addr, size_t size);
>
> ...
>
> --- a/kernel/dma/contiguous.c
> +++ b/kernel/dma/contiguous.c
> @@ -128,7 +128,7 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void)
>  #endif
>  
>  #ifdef CONFIG_DMA_PERNUMA_CMA
> -void __init dma_pernuma_cma_reserve(void)
> +static void __init dma_pernuma_cma_reserve(void)
>  {
>  	int nid;
>  
> @@ -153,6 +153,10 @@ void __init dma_pernuma_cma_reserve(void)
>  			(unsigned long long)pernuma_size_bytes / SZ_1M, nid);
>  	}
>  }
> +#else
> +static inline void __init dma_pernuma_cma_reserve(void)
> +{
> +}
>  #endif

And to not add this function?
  
Christoph Hellwig May 15, 2023, 9:49 a.m. UTC | #2
This looks fine to me.  Can you please work with Barry to make sure
the slight different place of the initcall doesn't break anything
for his setup?  I doubt it would, but I'd rather have a Tested-by:
tag.
  
Yajun Deng May 15, 2023, 11:23 a.m. UTC | #3
May 15, 2023 5:49 PM, "Christoph Hellwig" <hch@lst.de> wrote:

> This looks fine to me. Can you please work with Barry to make sure
> the slight different place of the initcall doesn't break anything
> for his setup? I doubt it would, but I'd rather have a Tested-by:
> tag.

Barry's email is no longer in use. I can't reach him.
  
Petr Tesařík May 15, 2023, 11:38 a.m. UTC | #4
On Mon, 15 May 2023 11:23:27 +0000
"Yajun Deng" <yajun.deng@linux.dev> wrote:

> May 15, 2023 5:49 PM, "Christoph Hellwig" <hch@lst.de> wrote:
> 
> > This looks fine to me. Can you please work with Barry to make sure
> > the slight different place of the initcall doesn't break anything
> > for his setup? I doubt it would, but I'd rather have a Tested-by:
> > tag.  
> 
> Barry's email is no longer in use. I can't reach him.

Which one? I would hope that his Gmail account is still valid:

  Barry Song <21cnbao@gmail.com>

Petr T
  
Yajun Deng May 16, 2023, 1:55 a.m. UTC | #5
Hello,  Barry

This patch changed the caller of dma_pernuma_cma_reserve() from

bootmem_init() to dma_contiguous_reserve(), do you think

would there be something wrong?

On 2023/5/12 17:42, Yajun Deng wrote:
> In the commit b7176c261cdb ("dma-contiguous: provide the ability to
> reserve per-numa CMA"), Barry adds DMA_PERNUMA_CMA for ARM64.
>
> But this feature is architecture independent, so support per-numa CMA
> for all architectures, and enable it by default if NUMA.
>
> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
> ---
>   Documentation/admin-guide/kernel-parameters.txt | 2 +-
>   arch/arm64/mm/init.c                            | 2 --
>   include/linux/dma-map-ops.h                     | 6 ------
>   kernel/dma/Kconfig                              | 6 +++---
>   kernel/dma/contiguous.c                         | 8 +++++++-
>   5 files changed, 11 insertions(+), 13 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 56d9458276a6..ac0002b2e323 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -692,7 +692,7 @@
>   			kernel/dma/contiguous.c
>   
>   	cma_pernuma=nn[MG]
> -			[ARM64,KNL,CMA]
> +			[KNL,CMA]
>   			Sets the size of kernel per-numa memory area for
>   			contiguous memory allocations. A value of 0 disables
>   			per-numa CMA altogether. And If this option is not
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 66e70ca47680..d560aef6aafa 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -410,8 +410,6 @@ void __init bootmem_init(void)
>   	arm64_hugetlb_cma_reserve();
>   #endif
>   
> -	dma_pernuma_cma_reserve();
> -
>   	kvm_hyp_reserve();
>   
>   	/*
> diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
> index 31f114f486c4..7af9949828ff 100644
> --- a/include/linux/dma-map-ops.h
> +++ b/include/linux/dma-map-ops.h
> @@ -168,12 +168,6 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page,
>   }
>   #endif /* CONFIG_DMA_CMA*/
>   
> -#ifdef CONFIG_DMA_PERNUMA_CMA
> -void dma_pernuma_cma_reserve(void);
> -#else
> -static inline void dma_pernuma_cma_reserve(void) { }
> -#endif /* CONFIG_DMA_PERNUMA_CMA */
> -
>   #ifdef CONFIG_DMA_DECLARE_COHERENT
>   int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
>   		dma_addr_t device_addr, size_t size);
> diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
> index 6677d0e64d27..79f83091e3a2 100644
> --- a/kernel/dma/Kconfig
> +++ b/kernel/dma/Kconfig
> @@ -140,10 +140,10 @@ if  DMA_CMA
>   
>   config DMA_PERNUMA_CMA
>   	bool "Enable separate DMA Contiguous Memory Area for each NUMA Node"
> -	default NUMA && ARM64
> +	default NUMA
>   	help
> -	  Enable this option to get pernuma CMA areas so that devices like
> -	  ARM64 SMMU can get local memory by DMA coherent APIs.
> +	  Enable this option to get pernuma CMA areas so that NUMA devices
> +	  can get local memory by DMA coherent APIs.
>   
>   	  You can set the size of pernuma CMA by specifying "cma_pernuma=size"
>   	  on the kernel's command line.
> diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
> index 6ea80ae42622..26a8e5365fcd 100644
> --- a/kernel/dma/contiguous.c
> +++ b/kernel/dma/contiguous.c
> @@ -128,7 +128,7 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void)
>   #endif
>   
>   #ifdef CONFIG_DMA_PERNUMA_CMA
> -void __init dma_pernuma_cma_reserve(void)
> +static void __init dma_pernuma_cma_reserve(void)
>   {
>   	int nid;
>   
> @@ -153,6 +153,10 @@ void __init dma_pernuma_cma_reserve(void)
>   			(unsigned long long)pernuma_size_bytes / SZ_1M, nid);
>   	}
>   }
> +#else
> +static inline void __init dma_pernuma_cma_reserve(void)
> +{
> +}
>   #endif
>   
>   /**
> @@ -171,6 +175,8 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
>   	phys_addr_t selected_limit = limit;
>   	bool fixed = false;
>   
> +	dma_pernuma_cma_reserve();
> +
>   	pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);
>   
>   	if (size_cmdline != -1) {
  
Yajun Deng May 16, 2023, 1:57 a.m. UTC | #6
On 2023/5/15 19:38, Petr Tesařík wrote:
> On Mon, 15 May 2023 11:23:27 +0000
> "Yajun Deng" <yajun.deng@linux.dev> wrote:
>
>> May 15, 2023 5:49 PM, "Christoph Hellwig" <hch@lst.de> wrote:
>>
>>> This looks fine to me. Can you please work with Barry to make sure
>>> the slight different place of the initcall doesn't break anything
>>> for his setup? I doubt it would, but I'd rather have a Tested-by:
>>> tag.
>> Barry's email is no longer in use. I can't reach him.
> Which one? I would hope that his Gmail account is still valid:
>
>    Barry Song <21cnbao@gmail.com>
   Thanks.
>
> Petr T
  
Andrew Morton June 24, 2023, 12:40 a.m. UTC | #7
On Mon, 15 May 2023 13:38:21 +0200 Petr Tesařík <petr@tesarici.cz> wrote:

> On Mon, 15 May 2023 11:23:27 +0000
> "Yajun Deng" <yajun.deng@linux.dev> wrote:
> 
> > May 15, 2023 5:49 PM, "Christoph Hellwig" <hch@lst.de> wrote:
> > 
> > > This looks fine to me. Can you please work with Barry to make sure
> > > the slight different place of the initcall doesn't break anything
> > > for his setup? I doubt it would, but I'd rather have a Tested-by:
> > > tag.  
> > 
> > Barry's email is no longer in use. I can't reach him.
> 
> Which one? I would hope that his Gmail account is still valid:
> 
>   Barry Song <21cnbao@gmail.com>
> 

Maybe his kernel.org address works...

I have this patch stuck in limbo for 6.4.  I guess I'll carry it over
into the next -rc cycle, see what happens.

fwiw, it has been in -next for six weeks, no known issues.
  
Yajun Deng June 25, 2023, 7:30 a.m. UTC | #8
June 24, 2023 8:40 AM, "Andrew Morton" <akpm@linux-foundation.org> wrote:

> On Mon, 15 May 2023 13:38:21 +0200 Petr Tesařík <petr@tesarici.cz> wrote:
> 
>> On Mon, 15 May 2023 11:23:27 +0000
>> "Yajun Deng" <yajun.deng@linux.dev> wrote:
>> 
>> May 15, 2023 5:49 PM, "Christoph Hellwig" <hch@lst.de> wrote:
> 
> This looks fine to me. Can you please work with Barry to make sure
> the slight different place of the initcall doesn't break anything
> for his setup? I doubt it would, but I'd rather have a Tested-by:
> tag.
>> Barry's email is no longer in use. I can't reach him.
>> 
>> Which one? I would hope that his Gmail account is still valid:
>> 
>> Barry Song <21cnbao@gmail.com>
> 
> Maybe his kernel.org address works...
> 
> I have this patch stuck in limbo for 6.4. I guess I'll carry it over
> into the next -rc cycle, see what happens.
> 
> fwiw, it has been in -next for six weeks, no known issues.

Hi, Barry, The slight different place of the initcall, does break anything?
  
Barry Song June 26, 2023, 5:32 a.m. UTC | #9
On Sun, Jun 25, 2023 at 7:30 PM Yajun Deng <yajun.deng@linux.dev> wrote:
>
> June 24, 2023 8:40 AM, "Andrew Morton" <akpm@linux-foundation.org> wrote:
>
> > On Mon, 15 May 2023 13:38:21 +0200 Petr Tesařík <petr@tesarici.cz> wrote:
> >
> >> On Mon, 15 May 2023 11:23:27 +0000
> >> "Yajun Deng" <yajun.deng@linux.dev> wrote:
> >>
> >> May 15, 2023 5:49 PM, "Christoph Hellwig" <hch@lst.de> wrote:
> >
> > This looks fine to me. Can you please work with Barry to make sure
> > the slight different place of the initcall doesn't break anything
> > for his setup? I doubt it would, but I'd rather have a Tested-by:
> > tag.
> >> Barry's email is no longer in use. I can't reach him.
> >>
> >> Which one? I would hope that his Gmail account is still valid:
> >>
> >> Barry Song <21cnbao@gmail.com>
> >
> > Maybe his kernel.org address works...
> >
> > I have this patch stuck in limbo for 6.4. I guess I'll carry it over
> > into the next -rc cycle, see what happens.
> >
> > fwiw, it has been in -next for six weeks, no known issues.
>
> Hi, Barry, The slight different place of the initcall, does break anything?

i don't see a fundamental difference as anyway it is still after
arch_numa_init()
which is really what we depend on.

and i did a test on qemu with the command line:
qemu-system-aarch64 -M virt,gic-version=3 -nographic \
 -smp cpus=8 \
 -numa node,cpus=0-1,nodeid=0 \
 -numa node,cpus=2-3,nodeid=1 \
 -numa node,cpus=4-5,nodeid=2 \
 -numa node,cpus=6-7,nodeid=3 \
 -numa dist,src=0,dst=1,val=12 \
 -numa dist,src=0,dst=2,val=20 \
 -numa dist,src=0,dst=3,val=22 \
 -numa dist,src=1,dst=2,val=22 \
 -numa dist,src=2,dst=3,val=12 \
 -numa dist,src=1,dst=3,val=24 \
 -m 4096M -cpu cortex-a57 -kernel arch/arm64/boot/Image \
 -nographic -append "cma_pernuma=32M root=/dev/vda2  rw ip=dhcp
sched_debug irqchip.gicv3_pseudo_nmi=1" \
 -drive if=none,file=extra/ubuntu16.04-arm64.img,id=hd0 -device
virtio-blk-device,drive=hd0 \
 -net nic -net user,hostfwd=tcp::2222-:22

and in system, i can see all cma areas are correctly reserved:
~# dmesg | grep cma
[    0.000000] cma: cma_declare_contiguous_nid(size
0x0000000002000000, base 0x0000000000000000, limit 0x0000000000000000
alignment 0x0000000000000000)
[    0.000000] cma: Reserved 32 MiB at 0x000000007ce00000
[    0.000000] cma: dma_pernuma_cma_reserve: reserved 32 MiB on node 0
[    0.000000] cma: cma_declare_contiguous_nid(size
0x0000000002000000, base 0x0000000000000000, limit 0x0000000000000000
alignment 0x0000000000000000)
[    0.000000] cma: Reserved 32 MiB at 0x00000000bce00000
[    0.000000] cma: dma_pernuma_cma_reserve: reserved 32 MiB on node 1
[    0.000000] cma: cma_declare_contiguous_nid(size
0x0000000002000000, base 0x0000000000000000, limit 0x0000000000000000
alignment 0x0000000000000000)
[    0.000000] cma: Reserved 32 MiB at 0x00000000fce00000
[    0.000000] cma: dma_pernuma_cma_reserve: reserved 32 MiB on node 2
[    0.000000] cma: cma_declare_contiguous_nid(size
0x0000000002000000, base 0x0000000000000000, limit 0x0000000000000000
alignment 0x0000000000000000)
[    0.000000] cma: Reserved 32 MiB at 0x0000000100000000
[    0.000000] cma: dma_pernuma_cma_reserve: reserved 32 MiB on node 3
[    0.000000] cma: dma_contiguous_reserve(limit 100000000)
[    0.000000] cma: dma_contiguous_reserve: reserving 32 MiB for global area
[    0.000000] cma: cma_declare_contiguous_nid(size
0x0000000002000000, base 0x0000000000000000, limit 0x0000000100000000
alignment 0x0000000000000000)
[    0.000000] cma: Reserved 32 MiB at 0x00000000fae00000
[    0.000000] Kernel command line: cma_pernuma=32M root=/dev/vda2  rw
ip=dhcp sched_debug irqchip.gicv3_pseudo_nmi=1
[    0.000000] Memory: 3848784K/4194304K available (16128K kernel
code, 4152K rwdata, 10244K rodata, 8512K init, 612K bss, 181680K
reserved, 163840K cma-reserved)
[    0.175309] cma: cma_alloc(cma (____ptrval____), count 128, align 7)
[    0.179264] cma: cma_alloc(): returned (____ptrval____)
[    0.179869] cma: cma_alloc(cma (____ptrval____), count 128, align 7)
[    0.180027] cma: cma_alloc(): returned (____ptrval____)
[    0.180187] cma: cma_alloc(cma (____ptrval____), count 128, align 7)
[    0.180374] cma: cma_alloc(): returned (____ptrval____)

so my feeling is that this patch is fine. but I would prefer Yicong
and Tiantao who have a real numa machine
and we can get some real device drivers to call dma APIs to allocate
memory from pernuma cma on arm64
even though it is 99.9% OK.

With their testing done, please feel free to add
Acked-by: Barry Song <baohua@kernel.org>

Thanks
Barry
  
Christoph Hellwig July 31, 2023, 4:02 p.m. UTC | #10
Thanks,

applied to the dma-mapping tree for 6.6.
  

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 56d9458276a6..ac0002b2e323 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -692,7 +692,7 @@ 
 			kernel/dma/contiguous.c
 
 	cma_pernuma=nn[MG]
-			[ARM64,KNL,CMA]
+			[KNL,CMA]
 			Sets the size of kernel per-numa memory area for
 			contiguous memory allocations. A value of 0 disables
 			per-numa CMA altogether. And If this option is not
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 66e70ca47680..d560aef6aafa 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -410,8 +410,6 @@  void __init bootmem_init(void)
 	arm64_hugetlb_cma_reserve();
 #endif
 
-	dma_pernuma_cma_reserve();
-
 	kvm_hyp_reserve();
 
 	/*
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 31f114f486c4..7af9949828ff 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -168,12 +168,6 @@  static inline void dma_free_contiguous(struct device *dev, struct page *page,
 }
 #endif /* CONFIG_DMA_CMA*/
 
-#ifdef CONFIG_DMA_PERNUMA_CMA
-void dma_pernuma_cma_reserve(void);
-#else
-static inline void dma_pernuma_cma_reserve(void) { }
-#endif /* CONFIG_DMA_PERNUMA_CMA */
-
 #ifdef CONFIG_DMA_DECLARE_COHERENT
 int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 		dma_addr_t device_addr, size_t size);
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 6677d0e64d27..79f83091e3a2 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -140,10 +140,10 @@  if  DMA_CMA
 
 config DMA_PERNUMA_CMA
 	bool "Enable separate DMA Contiguous Memory Area for each NUMA Node"
-	default NUMA && ARM64
+	default NUMA
 	help
-	  Enable this option to get pernuma CMA areas so that devices like
-	  ARM64 SMMU can get local memory by DMA coherent APIs.
+	  Enable this option to get pernuma CMA areas so that NUMA devices
+	  can get local memory by DMA coherent APIs.
 
 	  You can set the size of pernuma CMA by specifying "cma_pernuma=size"
 	  on the kernel's command line.
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 6ea80ae42622..26a8e5365fcd 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -128,7 +128,7 @@  static inline __maybe_unused phys_addr_t cma_early_percent_memory(void)
 #endif
 
 #ifdef CONFIG_DMA_PERNUMA_CMA
-void __init dma_pernuma_cma_reserve(void)
+static void __init dma_pernuma_cma_reserve(void)
 {
 	int nid;
 
@@ -153,6 +153,10 @@  void __init dma_pernuma_cma_reserve(void)
 			(unsigned long long)pernuma_size_bytes / SZ_1M, nid);
 	}
 }
+#else
+static inline void __init dma_pernuma_cma_reserve(void)
+{
+}
 #endif
 
 /**
@@ -171,6 +175,8 @@  void __init dma_contiguous_reserve(phys_addr_t limit)
 	phys_addr_t selected_limit = limit;
 	bool fixed = false;
 
+	dma_pernuma_cma_reserve();
+
 	pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);
 
 	if (size_cmdline != -1) {