[mm-unstable,v4,10/10] mm/hugetlb: change hugetlb allocation functions to return a folio

Message ID 20221118222002.82588-11-sidhartha.kumar@oracle.com
State New
Headers
Series convert core hugetlb functions to folios |

Commit Message

Sidhartha Kumar Nov. 18, 2022, 10:20 p.m. UTC
  Many hugetlb allocation helper functions have now been converting to
folios, update their higher level callers to be compatible with folios.

Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
---
 mm/hugetlb.c | 98 ++++++++++++++++++++++++----------------------------
 1 file changed, 46 insertions(+), 52 deletions(-)
  

Comments

John Hubbard Nov. 21, 2022, 2:31 a.m. UTC | #1
On 11/18/22 14:20, Sidhartha Kumar wrote:
...
> @@ -1950,7 +1949,7 @@ pgoff_t hugetlb_basepage_index(struct page *page)
>   	return (index << compound_order(page_head)) + compound_idx;
>   }
>   
> -static struct page *alloc_buddy_huge_page(struct hstate *h,
> +static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
>   		gfp_t gfp_mask, int nid, nodemask_t *nmask,
>   		nodemask_t *node_alloc_noretry)
>   {
> @@ -2009,7 +2008,7 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
>   	if (node_alloc_noretry && !page && alloc_try_hard)
>   		node_set(nid, *node_alloc_noretry);
>   
> -	return page;
> +	return page_folio(page);

1. This causes a NULL pointer crash when the user requests too many hugetlb
pages (you can probably guess how I know this, haha), for example:

     echo 50000 > /proc/sys/vm/nr_hugepages

...because page_folio() doesn't have a NULL check in there. You will want
to do something like this, on top of this current patch:

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 629bb044f063..ffb0f052bbff 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1987,11 +1987,6 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
                 page = NULL;
         }
  
-       if (page)
-               __count_vm_event(HTLB_BUDDY_PGALLOC);
-       else
-               __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
-
         /*
          * If we did not specify __GFP_RETRY_MAYFAIL, but still got a page this
          * indicates an overall state change.  Clear bit so that we resume
@@ -2008,6 +2003,12 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
         if (node_alloc_noretry && !page && alloc_try_hard)
                 node_set(nid, *node_alloc_noretry);
  
+       if (!page) {
+               __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
+               return NULL;
+       }
+
+       __count_vm_event(HTLB_BUDDY_PGALLOC);
         return page_folio(page);
  }
  

2. And also, the tests should probably be augmented to run this simple
(but easy to overlook) test.

3. And finally, the basic method of replacing page with page_folio(page)
is not sufficient, as you can see here. So I'd suggest taking a look
through your series to see if you are checking for NULL first, before
calling page_folio(page).


thanks,
  
Sidhartha Kumar Nov. 21, 2022, 3:46 p.m. UTC | #2
On 11/20/22 6:31 PM, John Hubbard wrote:
> On 11/18/22 14:20, Sidhartha Kumar wrote:
> ...
>> @@ -1950,7 +1949,7 @@ pgoff_t hugetlb_basepage_index(struct page *page)
>>       return (index << compound_order(page_head)) + compound_idx;
>>   }
>>   -static struct page *alloc_buddy_huge_page(struct hstate *h,
>> +static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
>>           gfp_t gfp_mask, int nid, nodemask_t *nmask,
>>           nodemask_t *node_alloc_noretry)
>>   {
>> @@ -2009,7 +2008,7 @@ static struct page 
>> *alloc_buddy_huge_page(struct hstate *h,
>>       if (node_alloc_noretry && !page && alloc_try_hard)
>>           node_set(nid, *node_alloc_noretry);
>>   -    return page;
>> +    return page_folio(page);
>
> 1. This causes a NULL pointer crash when the user requests too many 
> hugetlb
> pages (you can probably guess how I know this, haha), for example:
>
>     echo 50000 > /proc/sys/vm/nr_hugepages
>
> ...because page_folio() doesn't have a NULL check in there. You will want
> to do something like this, on top of this current patch:
>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 629bb044f063..ffb0f052bbff 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1987,11 +1987,6 @@ static struct folio 
> *alloc_buddy_hugetlb_folio(struct hstate *h,
>                 page = NULL;
>         }
>
> -       if (page)
> -               __count_vm_event(HTLB_BUDDY_PGALLOC);
> -       else
> -               __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
> -
>         /*
>          * If we did not specify __GFP_RETRY_MAYFAIL, but still got a 
> page this
>          * indicates an overall state change.  Clear bit so that we 
> resume
> @@ -2008,6 +2003,12 @@ static struct folio 
> *alloc_buddy_hugetlb_folio(struct hstate *h,
>         if (node_alloc_noretry && !page && alloc_try_hard)
>                 node_set(nid, *node_alloc_noretry);
>
> +       if (!page) {
> +               __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
> +               return NULL;
> +       }
> +
> +       __count_vm_event(HTLB_BUDDY_PGALLOC);
>         return page_folio(page);
>  }
>
>
> 2. And also, the tests should probably be augmented to run this simple
> (but easy to overlook) test.
>
Sure, I can look into adding this type of test to Linux Test Project.


> 3. And finally, the basic method of replacing page with page_folio(page)
> is not sufficient, as you can see here. So I'd suggest taking a look
> through your series to see if you are checking for NULL first, before
> calling page_folio(page).
>
I will go through my series again to check for possible NULL dereferences.

Thanks for taking a look.

Sidhartha Kumar

>
> thanks,
  

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b690ea7aaa00..2de3e5ccbc3f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1378,7 +1378,7 @@  static void free_gigantic_folio(struct folio *folio, unsigned int order)
 }
 
 #ifdef CONFIG_CONTIG_ALLOC
-static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
 		int nid, nodemask_t *nodemask)
 {
 	unsigned long nr_pages = pages_per_huge_page(h);
@@ -1394,7 +1394,7 @@  static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
 			page = cma_alloc(hugetlb_cma[nid], nr_pages,
 					huge_page_order(h), true);
 			if (page)
-				return page;
+				return page_folio(page);
 		}
 
 		if (!(gfp_mask & __GFP_THISNODE)) {
@@ -1405,17 +1405,16 @@  static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
 				page = cma_alloc(hugetlb_cma[node], nr_pages,
 						huge_page_order(h), true);
 				if (page)
-					return page;
+					return page_folio(page);
 			}
 		}
 	}
 #endif
-
-	return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
+	return page_folio(alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask));
 }
 
 #else /* !CONFIG_CONTIG_ALLOC */
-static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
 					int nid, nodemask_t *nodemask)
 {
 	return NULL;
@@ -1423,7 +1422,7 @@  static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
 #endif /* CONFIG_CONTIG_ALLOC */
 
 #else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
-static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
 					int nid, nodemask_t *nodemask)
 {
 	return NULL;
@@ -1950,7 +1949,7 @@  pgoff_t hugetlb_basepage_index(struct page *page)
 	return (index << compound_order(page_head)) + compound_idx;
 }
 
-static struct page *alloc_buddy_huge_page(struct hstate *h,
+static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
 		gfp_t gfp_mask, int nid, nodemask_t *nmask,
 		nodemask_t *node_alloc_noretry)
 {
@@ -2009,7 +2008,7 @@  static struct page *alloc_buddy_huge_page(struct hstate *h,
 	if (node_alloc_noretry && !page && alloc_try_hard)
 		node_set(nid, *node_alloc_noretry);
 
-	return page;
+	return page_folio(page);
 }
 
 /*
@@ -2019,23 +2018,21 @@  static struct page *alloc_buddy_huge_page(struct hstate *h,
  * Note that returned page is 'frozen':  ref count of head page and all tail
  * pages is zero.
  */
-static struct page *alloc_fresh_huge_page(struct hstate *h,
+static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
 		gfp_t gfp_mask, int nid, nodemask_t *nmask,
 		nodemask_t *node_alloc_noretry)
 {
-	struct page *page;
 	struct folio *folio;
 	bool retry = false;
 
 retry:
 	if (hstate_is_gigantic(h))
-		page = alloc_gigantic_page(h, gfp_mask, nid, nmask);
+		folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
 	else
-		page = alloc_buddy_huge_page(h, gfp_mask,
+		folio = alloc_buddy_hugetlb_folio(h, gfp_mask,
 				nid, nmask, node_alloc_noretry);
-	if (!page)
+	if (!folio)
 		return NULL;
-	folio = page_folio(page);
 	if (hstate_is_gigantic(h)) {
 		if (!prep_compound_gigantic_folio(folio, huge_page_order(h))) {
 			/*
@@ -2052,7 +2049,7 @@  static struct page *alloc_fresh_huge_page(struct hstate *h,
 	}
 	prep_new_hugetlb_folio(h, folio, folio_nid(folio));
 
-	return page;
+	return folio;
 }
 
 /*
@@ -2062,21 +2059,21 @@  static struct page *alloc_fresh_huge_page(struct hstate *h,
 static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
 				nodemask_t *node_alloc_noretry)
 {
-	struct page *page;
+	struct folio *folio;
 	int nr_nodes, node;
 	gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
 
 	for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
-		page = alloc_fresh_huge_page(h, gfp_mask, node, nodes_allowed,
-						node_alloc_noretry);
-		if (page)
+		folio = alloc_fresh_hugetlb_folio(h, gfp_mask, node,
+					nodes_allowed, node_alloc_noretry);
+		if (folio)
 			break;
 	}
 
-	if (!page)
+	if (!folio)
 		return 0;
 
-	free_huge_page(page); /* free it into the hugepage allocator */
+	free_huge_page(&folio->page); /* free it into the hugepage allocator */
 
 	return 1;
 }
@@ -2237,7 +2234,7 @@  int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
 static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
 						int nid, nodemask_t *nmask)
 {
-	struct page *page = NULL;
+	struct folio *folio = NULL;
 
 	if (hstate_is_gigantic(h))
 		return NULL;
@@ -2247,8 +2244,8 @@  static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
 		goto out_unlock;
 	spin_unlock_irq(&hugetlb_lock);
 
-	page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
-	if (!page)
+	folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+	if (!folio)
 		return NULL;
 
 	spin_lock_irq(&hugetlb_lock);
@@ -2260,43 +2257,42 @@  static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
 	 * codeflow
 	 */
 	if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
-		SetHPageTemporary(page);
+		folio_set_hugetlb_temporary(folio);
 		spin_unlock_irq(&hugetlb_lock);
-		free_huge_page(page);
+		free_huge_page(&folio->page);
 		return NULL;
 	}
 
 	h->surplus_huge_pages++;
-	h->surplus_huge_pages_node[page_to_nid(page)]++;
+	h->surplus_huge_pages_node[folio_nid(folio)]++;
 
 out_unlock:
 	spin_unlock_irq(&hugetlb_lock);
 
-	return page;
+	return &folio->page;
 }
 
 static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
 				     int nid, nodemask_t *nmask)
 {
-	struct page *page;
+	struct folio *folio;
 
 	if (hstate_is_gigantic(h))
 		return NULL;
 
-	page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
-	if (!page)
+	folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+	if (!folio)
 		return NULL;
 
 	/* fresh huge pages are frozen */
-	set_page_refcounted(page);
-
+	folio_ref_unfreeze(folio, 1);
 	/*
 	 * We do not account these pages as surplus because they are only
 	 * temporary and will be released properly on the last reference
 	 */
-	SetHPageTemporary(page);
+	folio_set_hugetlb_temporary(folio);
 
-	return page;
+	return &folio->page;
 }
 
 /*
@@ -2745,19 +2741,18 @@  void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
 }
 
 /*
- * alloc_and_dissolve_huge_page - Allocate a new page and dissolve the old one
+ * alloc_and_dissolve_hugetlb_folio - Allocate a new folio and dissolve
+ * the old one
  * @h: struct hstate old page belongs to
  * @old_page: Old page to dissolve
  * @list: List to isolate the page in case we need to
  * Returns 0 on success, otherwise negated error.
  */
-static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
-					struct list_head *list)
+static int alloc_and_dissolve_hugetlb_folio(struct hstate *h,
+			struct folio *old_folio, struct list_head *list)
 {
 	gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
-	struct folio *old_folio = page_folio(old_page);
 	int nid = folio_nid(old_folio);
-	struct page *new_page;
 	struct folio *new_folio;
 	int ret = 0;
 
@@ -2768,26 +2763,25 @@  static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
 	 * the pool.  This simplifies and let us do most of the processing
 	 * under the lock.
 	 */
-	new_page = alloc_buddy_huge_page(h, gfp_mask, nid, NULL, NULL);
-	if (!new_page)
+	new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, NULL, NULL);
+	if (!new_folio)
 		return -ENOMEM;
-	new_folio = page_folio(new_page);
 	__prep_new_hugetlb_folio(h, new_folio);
 
 retry:
 	spin_lock_irq(&hugetlb_lock);
 	if (!folio_test_hugetlb(old_folio)) {
 		/*
-		 * Freed from under us. Drop new_page too.
+		 * Freed from under us. Drop new_folio too.
 		 */
 		goto free_new;
 	} else if (folio_ref_count(old_folio)) {
 		/*
-		 * Someone has grabbed the page, try to isolate it here.
+		 * Someone has grabbed the folio, try to isolate it here.
 		 * Fail with -EBUSY if not possible.
 		 */
 		spin_unlock_irq(&hugetlb_lock);
-		ret = isolate_hugetlb(old_page, list);
+		ret = isolate_hugetlb(&old_folio->page, list);
 		spin_lock_irq(&hugetlb_lock);
 		goto free_new;
 	} else if (!folio_test_hugetlb_freed(old_folio)) {
@@ -2865,7 +2859,7 @@  int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
 	if (folio_ref_count(folio) && !isolate_hugetlb(&folio->page, list))
 		ret = 0;
 	else if (!folio_ref_count(folio))
-		ret = alloc_and_dissolve_huge_page(h, &folio->page, list);
+		ret = alloc_and_dissolve_hugetlb_folio(h, folio, list);
 
 	return ret;
 }
@@ -3083,14 +3077,14 @@  static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
 			if (!alloc_bootmem_huge_page(h, nid))
 				break;
 		} else {
-			struct page *page;
+			struct folio *folio;
 			gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
 
-			page = alloc_fresh_huge_page(h, gfp_mask, nid,
+			folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid,
 					&node_states[N_MEMORY], NULL);
-			if (!page)
+			if (!folio)
 				break;
-			free_huge_page(page); /* free it into the hugepage allocator */
+			free_huge_page(&folio->page); /* free it into the hugepage allocator */
 		}
 		cond_resched();
 	}