[v4] mm: pass nid to reserve_bootmem_region()

Message ID 20230619023406.424298-1-yajun.deng@linux.dev
State New
Headers
Series [v4] mm: pass nid to reserve_bootmem_region() |

Commit Message

Yajun Deng June 19, 2023, 2:34 a.m. UTC
  early_pfn_to_nid() is called frequently in init_reserved_page(), it
returns the node id of the PFN. These PFN are probably from the same
memory region, they have the same node id. It's not necessary to call
early_pfn_to_nid() for each PFN.

Pass nid to reserve_bootmem_region() and drop the call to
early_pfn_to_nid() in init_reserved_page(). Also, set nid on all
reserved pages before doing this, as some reserved memory regions may
not be set nid.

The most beneficial function is memmap_init_reserved_pages() if
CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled.

The following data was tested on an x86 machine with 190GB of RAM.

before:
memmap_init_reserved_pages()  67ms

after:
memmap_init_reserved_pages()  20ms

Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202306160145.juJMr3Bi-lkp@intel.com
---
V3 -> V4: make the test for early_page_initialised() inside if
	  (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT))
V2 -> V3: set nid on all reserved pages before pass nid.
V1 -> V2: fix build error when CONFIG_NUMA is not enabled.
---
 include/linux/mm.h |  3 ++-
 mm/memblock.c      | 31 +++++++++++++++++++++----------
 mm/mm_init.c       | 30 +++++++++++++++++-------------
 3 files changed, 40 insertions(+), 24 deletions(-)
  

Comments

Mike Rapoport June 20, 2023, 8:45 a.m. UTC | #1
On Mon, Jun 19, 2023 at 10:34:06AM +0800, Yajun Deng wrote:
> early_pfn_to_nid() is called frequently in init_reserved_page(), it
> returns the node id of the PFN. These PFN are probably from the same
> memory region, they have the same node id. It's not necessary to call
> early_pfn_to_nid() for each PFN.
> 
> Pass nid to reserve_bootmem_region() and drop the call to
> early_pfn_to_nid() in init_reserved_page(). Also, set nid on all
> reserved pages before doing this, as some reserved memory regions may
> not be set nid.
> 
> The most beneficial function is memmap_init_reserved_pages() if
> CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled.
> 
> The following data was tested on an x86 machine with 190GB of RAM.
> 
> before:
> memmap_init_reserved_pages()  67ms
> 
> after:
> memmap_init_reserved_pages()  20ms
> 
> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202306160145.juJMr3Bi-lkp@intel.com

Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>

> ---
> V3 -> V4: make the test for early_page_initialised() inside if
> 	  (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT))
> V2 -> V3: set nid on all reserved pages before pass nid.
> V1 -> V2: fix build error when CONFIG_NUMA is not enabled.
> ---
>  include/linux/mm.h |  3 ++-
>  mm/memblock.c      | 31 +++++++++++++++++++++----------
>  mm/mm_init.c       | 30 +++++++++++++++++-------------
>  3 files changed, 40 insertions(+), 24 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index fdd966b11f79..a7a0e692d44d 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2960,7 +2960,8 @@ extern unsigned long free_reserved_area(void *start, void *end,
>  
>  extern void adjust_managed_page_count(struct page *page, long count);
>  
> -extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end);
> +extern void reserve_bootmem_region(phys_addr_t start,
> +				   phys_addr_t end, int nid);
>  
>  /* Free the reserved page into the buddy system, so it gets managed. */
>  static inline void free_reserved_page(struct page *page)
> diff --git a/mm/memblock.c b/mm/memblock.c
> index ff0da1858778..f9e61e565a53 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -2091,19 +2091,30 @@ static void __init memmap_init_reserved_pages(void)
>  {
>  	struct memblock_region *region;
>  	phys_addr_t start, end;
> -	u64 i;
> +	int nid;
> +
> +	/*
> +	 * set nid on all reserved pages and also treat struct
> +	 * pages for the NOMAP regions as PageReserved
> +	 */
> +	for_each_mem_region(region) {
> +		nid = memblock_get_region_node(region);
> +		start = region->base;
> +		end = start + region->size;
> +
> +		if (memblock_is_nomap(region))
> +			reserve_bootmem_region(start, end, nid);
> +
> +		memblock_set_node(start, end, &memblock.reserved, nid);
> +	}
>  
>  	/* initialize struct pages for the reserved regions */
> -	for_each_reserved_mem_range(i, &start, &end)
> -		reserve_bootmem_region(start, end);
> +	for_each_reserved_mem_region(region) {
> +		nid = memblock_get_region_node(region);
> +		start = region->base;
> +		end = start + region->size;
>  
> -	/* and also treat struct pages for the NOMAP regions as PageReserved */
> -	for_each_mem_region(region) {
> -		if (memblock_is_nomap(region)) {
> -			start = region->base;
> -			end = start + region->size;
> -			reserve_bootmem_region(start, end);
> -		}
> +		reserve_bootmem_region(start, end, nid);
>  	}
>  }
>  
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index d393631599a7..a1963c3322af 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -646,10 +646,8 @@ static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
>  }
>  
>  /* Returns true if the struct page for the pfn is initialised */
> -static inline bool __meminit early_page_initialised(unsigned long pfn)
> +static inline bool __meminit early_page_initialised(unsigned long pfn, int nid)
>  {
> -	int nid = early_pfn_to_nid(pfn);
> -
>  	if (node_online(nid) && pfn >= NODE_DATA(nid)->first_deferred_pfn)
>  		return false;
>  
> @@ -695,15 +693,14 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
>  	return false;
>  }
>  
> -static void __meminit init_reserved_page(unsigned long pfn)
> +static void __meminit init_reserved_page(unsigned long pfn, int nid)
>  {
>  	pg_data_t *pgdat;
> -	int nid, zid;
> +	int zid;
>  
> -	if (early_page_initialised(pfn))
> +	if (early_page_initialised(pfn, nid))
>  		return;
>  
> -	nid = early_pfn_to_nid(pfn);
>  	pgdat = NODE_DATA(nid);
>  
>  	for (zid = 0; zid < MAX_NR_ZONES; zid++) {
> @@ -717,7 +714,7 @@ static void __meminit init_reserved_page(unsigned long pfn)
>  #else
>  static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
>  
> -static inline bool early_page_initialised(unsigned long pfn)
> +static inline bool early_page_initialised(unsigned long pfn, int nid)
>  {
>  	return true;
>  }
> @@ -727,7 +724,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
>  	return false;
>  }
>  
> -static inline void init_reserved_page(unsigned long pfn)
> +static inline void init_reserved_page(unsigned long pfn, int nid)
>  {
>  }
>  #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
> @@ -738,7 +735,8 @@ static inline void init_reserved_page(unsigned long pfn)
>   * marks the pages PageReserved. The remaining valid pages are later
>   * sent to the buddy page allocator.
>   */
> -void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
> +void __meminit reserve_bootmem_region(phys_addr_t start,
> +				      phys_addr_t end, int nid)
>  {
>  	unsigned long start_pfn = PFN_DOWN(start);
>  	unsigned long end_pfn = PFN_UP(end);
> @@ -747,7 +745,7 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
>  		if (pfn_valid(start_pfn)) {
>  			struct page *page = pfn_to_page(start_pfn);
>  
> -			init_reserved_page(start_pfn);
> +			init_reserved_page(start_pfn, nid);
>  
>  			/* Avoid false-positive PageTail() */
>  			INIT_LIST_HEAD(&page->lru);
> @@ -2579,8 +2577,14 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
>  void __init memblock_free_pages(struct page *page, unsigned long pfn,
>  							unsigned int order)
>  {
> -	if (!early_page_initialised(pfn))
> -		return;
> +
> +	if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) {
> +		int nid = early_pfn_to_nid(pfn);
> +
> +		if (!early_page_initialised(pfn, nid))
> +			return;
> +	}
> +
>  	if (!kmsan_memblock_free_pages(page, order)) {
>  		/* KMSAN will take care of these pages. */
>  		return;
> -- 
> 2.25.1
>
  

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fdd966b11f79..a7a0e692d44d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2960,7 +2960,8 @@  extern unsigned long free_reserved_area(void *start, void *end,
 
 extern void adjust_managed_page_count(struct page *page, long count);
 
-extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end);
+extern void reserve_bootmem_region(phys_addr_t start,
+				   phys_addr_t end, int nid);
 
 /* Free the reserved page into the buddy system, so it gets managed. */
 static inline void free_reserved_page(struct page *page)
diff --git a/mm/memblock.c b/mm/memblock.c
index ff0da1858778..f9e61e565a53 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -2091,19 +2091,30 @@  static void __init memmap_init_reserved_pages(void)
 {
 	struct memblock_region *region;
 	phys_addr_t start, end;
-	u64 i;
+	int nid;
+
+	/*
+	 * set nid on all reserved pages and also treat struct
+	 * pages for the NOMAP regions as PageReserved
+	 */
+	for_each_mem_region(region) {
+		nid = memblock_get_region_node(region);
+		start = region->base;
+		end = start + region->size;
+
+		if (memblock_is_nomap(region))
+			reserve_bootmem_region(start, end, nid);
+
+		memblock_set_node(start, end, &memblock.reserved, nid);
+	}
 
 	/* initialize struct pages for the reserved regions */
-	for_each_reserved_mem_range(i, &start, &end)
-		reserve_bootmem_region(start, end);
+	for_each_reserved_mem_region(region) {
+		nid = memblock_get_region_node(region);
+		start = region->base;
+		end = start + region->size;
 
-	/* and also treat struct pages for the NOMAP regions as PageReserved */
-	for_each_mem_region(region) {
-		if (memblock_is_nomap(region)) {
-			start = region->base;
-			end = start + region->size;
-			reserve_bootmem_region(start, end);
-		}
+		reserve_bootmem_region(start, end, nid);
 	}
 }
 
diff --git a/mm/mm_init.c b/mm/mm_init.c
index d393631599a7..a1963c3322af 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -646,10 +646,8 @@  static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
 }
 
 /* Returns true if the struct page for the pfn is initialised */
-static inline bool __meminit early_page_initialised(unsigned long pfn)
+static inline bool __meminit early_page_initialised(unsigned long pfn, int nid)
 {
-	int nid = early_pfn_to_nid(pfn);
-
 	if (node_online(nid) && pfn >= NODE_DATA(nid)->first_deferred_pfn)
 		return false;
 
@@ -695,15 +693,14 @@  defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 	return false;
 }
 
-static void __meminit init_reserved_page(unsigned long pfn)
+static void __meminit init_reserved_page(unsigned long pfn, int nid)
 {
 	pg_data_t *pgdat;
-	int nid, zid;
+	int zid;
 
-	if (early_page_initialised(pfn))
+	if (early_page_initialised(pfn, nid))
 		return;
 
-	nid = early_pfn_to_nid(pfn);
 	pgdat = NODE_DATA(nid);
 
 	for (zid = 0; zid < MAX_NR_ZONES; zid++) {
@@ -717,7 +714,7 @@  static void __meminit init_reserved_page(unsigned long pfn)
 #else
 static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
 
-static inline bool early_page_initialised(unsigned long pfn)
+static inline bool early_page_initialised(unsigned long pfn, int nid)
 {
 	return true;
 }
@@ -727,7 +724,7 @@  static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 	return false;
 }
 
-static inline void init_reserved_page(unsigned long pfn)
+static inline void init_reserved_page(unsigned long pfn, int nid)
 {
 }
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
@@ -738,7 +735,8 @@  static inline void init_reserved_page(unsigned long pfn)
  * marks the pages PageReserved. The remaining valid pages are later
  * sent to the buddy page allocator.
  */
-void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
+void __meminit reserve_bootmem_region(phys_addr_t start,
+				      phys_addr_t end, int nid)
 {
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long end_pfn = PFN_UP(end);
@@ -747,7 +745,7 @@  void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
 		if (pfn_valid(start_pfn)) {
 			struct page *page = pfn_to_page(start_pfn);
 
-			init_reserved_page(start_pfn);
+			init_reserved_page(start_pfn, nid);
 
 			/* Avoid false-positive PageTail() */
 			INIT_LIST_HEAD(&page->lru);
@@ -2579,8 +2577,14 @@  void __init set_dma_reserve(unsigned long new_dma_reserve)
 void __init memblock_free_pages(struct page *page, unsigned long pfn,
 							unsigned int order)
 {
-	if (!early_page_initialised(pfn))
-		return;
+
+	if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) {
+		int nid = early_pfn_to_nid(pfn);
+
+		if (!early_page_initialised(pfn, nid))
+			return;
+	}
+
 	if (!kmsan_memblock_free_pages(page, order)) {
 		/* KMSAN will take care of these pages. */
 		return;