mm/page_alloc: Make deferred page init free pages in MAX_ORDER blocks

Message ID 20230317153501.19807-1-kirill.shutemov@linux.intel.com
State New
Headers
Series mm/page_alloc: Make deferred page init free pages in MAX_ORDER blocks |

Commit Message

Kirill A. Shutemov March 17, 2023, 3:35 p.m. UTC
  Normal page init path frees pages during the boot in MAX_ORDER chunks,
but deferred page init path does it in pageblock blocks.

Change deferred page init path to work in MAX_ORDER blocks.

For cases when pageblock is larger than MAX_ORDER, set migrate type to
MIGRATE_MOVABLE for all pageblocks covered by the page.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---

Note: the patch depends on the new definiton of MAX_ORDER.

---
 include/linux/mmzone.h |  2 ++
 mm/page_alloc.c        | 19 ++++++++++---------
 2 files changed, 12 insertions(+), 9 deletions(-)
  

Comments

David Hildenbrand March 17, 2023, 5:50 p.m. UTC | #1
On 17.03.23 16:35, Kirill A. Shutemov wrote:
> Normal page init path frees pages during the boot in MAX_ORDER chunks,
> but deferred page init path does it in pageblock blocks.
> 
> Change deferred page init path to work in MAX_ORDER blocks.
> 
> For cases when pageblock is larger than MAX_ORDER, set migrate type to
> MIGRATE_MOVABLE for all pageblocks covered by the page.

See

commit b3d40a2b6d10c9d0424d2b398bf962fb6adad87e
Author: David Hildenbrand <david@redhat.com>
Date:   Tue Mar 22 14:43:20 2022 -0700

     mm: enforce pageblock_order < MAX_ORDER
     
     Some places in the kernel don't really expect pageblock_order >=
     MAX_ORDER, and it looks like this is only possible in corner cases:
     
     1) CONFIG_DEFERRED_STRUCT_PAGE_INIT we'll end up freeing pageblock_order
        pages via __free_pages_core(), which cannot possibly work.

     ...

How should it still happen?
  
Kirill A. Shutemov March 17, 2023, 8:08 p.m. UTC | #2
On Fri, Mar 17, 2023 at 06:50:17PM +0100, David Hildenbrand wrote:
> On 17.03.23 16:35, Kirill A. Shutemov wrote:
> > Normal page init path frees pages during the boot in MAX_ORDER chunks,
> > but deferred page init path does it in pageblock blocks.
> > 
> > Change deferred page init path to work in MAX_ORDER blocks.
> > 
> > For cases when pageblock is larger than MAX_ORDER, set migrate type to
> > MIGRATE_MOVABLE for all pageblocks covered by the page.
> 
> See
> 
> commit b3d40a2b6d10c9d0424d2b398bf962fb6adad87e
> Author: David Hildenbrand <david@redhat.com>
> Date:   Tue Mar 22 14:43:20 2022 -0700
> 
>     mm: enforce pageblock_order < MAX_ORDER
>     Some places in the kernel don't really expect pageblock_order >=
>     MAX_ORDER, and it looks like this is only possible in corner cases:
>     1) CONFIG_DEFERRED_STRUCT_PAGE_INIT we'll end up freeing pageblock_order
>        pages via __free_pages_core(), which cannot possibly work.
> 
>     ...
> 
> How should it still happen?

I got the sentence backwards. It suppose to be

	For cases when MAX_ORDER is larger than pageblock, set migrate type to
	MIGRATE_MOVABLE for all pageblocks covered by the page.
  
Mel Gorman March 21, 2023, 4:44 p.m. UTC | #3
On Fri, Mar 17, 2023 at 06:35:01PM +0300, Kirill A. Shutemov wrote:
> Normal page init path frees pages during the boot in MAX_ORDER chunks,
> but deferred page init path does it in pageblock blocks.
> 
> Change deferred page init path to work in MAX_ORDER blocks.
> 
> For cases when pageblock is larger than MAX_ORDER, set migrate type to
> MIGRATE_MOVABLE for all pageblocks covered by the page.
> 

The problem with the sentence was pointed out already.

> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>

Otherwise;

Acked-by: Mel Gorman <mgorman@suse.de>
  

Patch

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 96599cb9eb62..f53fe3a7ca45 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -32,6 +32,8 @@ 
 #endif
 #define MAX_ORDER_NR_PAGES (1 << MAX_ORDER)
 
+#define IS_MAX_ORDER_ALIGNED(pfn) IS_ALIGNED(pfn, MAX_ORDER_NR_PAGES)
+
 /*
  * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
  * costly to service.  That is between allocation orders which should
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 87d760236dba..fc02a243425d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1875,9 +1875,10 @@  static void __init deferred_free_range(unsigned long pfn,
 	page = pfn_to_page(pfn);
 
 	/* Free a large naturally-aligned chunk if possible */
-	if (nr_pages == pageblock_nr_pages && pageblock_aligned(pfn)) {
-		set_pageblock_migratetype(page, MIGRATE_MOVABLE);
-		__free_pages_core(page, pageblock_order);
+	if (nr_pages == MAX_ORDER_NR_PAGES && IS_MAX_ORDER_ALIGNED(pfn)) {
+		for (i = 0; i < nr_pages; i += pageblock_nr_pages)
+			set_pageblock_migratetype(page + i, MIGRATE_MOVABLE);
+		__free_pages_core(page, MAX_ORDER);
 		return;
 	}
 
@@ -1901,19 +1902,19 @@  static inline void __init pgdat_init_report_one_done(void)
 /*
  * Returns true if page needs to be initialized or freed to buddy allocator.
  *
- * We check if a current large page is valid by only checking the validity
+ * We check if a current MAX_ORDER block is valid by only checking the validity
  * of the head pfn.
  */
 static inline bool __init deferred_pfn_valid(unsigned long pfn)
 {
-	if (pageblock_aligned(pfn) && !pfn_valid(pfn))
+	if (IS_MAX_ORDER_ALIGNED(pfn) && !pfn_valid(pfn))
 		return false;
 	return true;
 }
 
 /*
  * Free pages to buddy allocator. Try to free aligned pages in
- * pageblock_nr_pages sizes.
+ * MAX_ORDER_NR_PAGES sizes.
  */
 static void __init deferred_free_pages(unsigned long pfn,
 				       unsigned long end_pfn)
@@ -1924,7 +1925,7 @@  static void __init deferred_free_pages(unsigned long pfn,
 		if (!deferred_pfn_valid(pfn)) {
 			deferred_free_range(pfn - nr_free, nr_free);
 			nr_free = 0;
-		} else if (pageblock_aligned(pfn)) {
+		} else if (IS_MAX_ORDER_ALIGNED(pfn)) {
 			deferred_free_range(pfn - nr_free, nr_free);
 			nr_free = 1;
 		} else {
@@ -1937,7 +1938,7 @@  static void __init deferred_free_pages(unsigned long pfn,
 
 /*
  * Initialize struct pages.  We minimize pfn page lookups and scheduler checks
- * by performing it only once every pageblock_nr_pages.
+ * by performing it only once every MAX_ORDER_NR_PAGES.
  * Return number of pages initialized.
  */
 static unsigned long  __init deferred_init_pages(struct zone *zone,
@@ -1953,7 +1954,7 @@  static unsigned long  __init deferred_init_pages(struct zone *zone,
 		if (!deferred_pfn_valid(pfn)) {
 			page = NULL;
 			continue;
-		} else if (!page || pageblock_aligned(pfn)) {
+		} else if (!page || IS_MAX_ORDER_ALIGNED(pfn)) {
 			page = pfn_to_page(pfn);
 		} else {
 			page++;