On Tue, Jan 03, 2023 at 01:13:35PM -0600, Sidhartha Kumar wrote:
> +static struct folio *dequeue_hugetlb_folio_node_exact(struct hstate *h,
> + int nid)
> {
> struct page *page;
> + struct folio *folio;
> bool pin = !!(current->flags & PF_MEMALLOC_PIN);
>
> lockdep_assert_held(&hugetlb_lock);
> list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
> - if (pin && !is_longterm_pinnable_page(page))
> + folio = page_folio(page);
I'd argue that you can pull folios directly off the hugepage_freelists.
Since they're attached through the 'lru', you know they're not tail
pages, because lru.prev aliases with compound_head.
The rest looks good.
On 01/03/23 21:00, Matthew Wilcox wrote:
> On Tue, Jan 03, 2023 at 01:13:35PM -0600, Sidhartha Kumar wrote:
> > +static struct folio *dequeue_hugetlb_folio_node_exact(struct hstate *h,
> > + int nid)
> > {
> > struct page *page;
> > + struct folio *folio;
> > bool pin = !!(current->flags & PF_MEMALLOC_PIN);
> >
> > lockdep_assert_held(&hugetlb_lock);
> > list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
> > - if (pin && !is_longterm_pinnable_page(page))
> > + folio = page_folio(page);
>
> I'd argue that you can pull folios directly off the hugepage_freelists.
> Since they're attached through the 'lru', you know they're not tail
> pages, because lru.prev aliases with compound_head.
Yes, then we can get rid of the local variable *page.
A quick grep shows only the routine __mem_cgroup_uncharge_list() does
this today.
@@ -1140,32 +1140,36 @@ static void enqueue_hugetlb_folio(struct hstate *h, struct folio *folio)
folio_set_hugetlb_freed(folio);
}
-static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
+static struct folio *dequeue_hugetlb_folio_node_exact(struct hstate *h,
+ int nid)
{
struct page *page;
+ struct folio *folio;
bool pin = !!(current->flags & PF_MEMALLOC_PIN);
lockdep_assert_held(&hugetlb_lock);
list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
- if (pin && !is_longterm_pinnable_page(page))
+ folio = page_folio(page);
+
+ if (pin && !folio_is_longterm_pinnable(folio))
continue;
- if (PageHWPoison(page))
+ if (folio_test_hwpoison(folio))
continue;
- list_move(&page->lru, &h->hugepage_activelist);
- set_page_refcounted(page);
- ClearHPageFreed(page);
+ list_move(&folio->lru, &h->hugepage_activelist);
+ folio_ref_unfreeze(folio, 1);
+ folio_clear_hugetlb_freed(folio);
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
- return page;
+ return folio;
}
return NULL;
}
-static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, int nid,
- nodemask_t *nmask)
+static struct folio *dequeue_hugetlb_folio_nodemask(struct hstate *h, gfp_t gfp_mask,
+ int nid, nodemask_t *nmask)
{
unsigned int cpuset_mems_cookie;
struct zonelist *zonelist;
@@ -1178,7 +1182,7 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask,
retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nmask) {
- struct page *page;
+ struct folio *folio;
if (!cpuset_zone_allowed(zone, gfp_mask))
continue;
@@ -1190,9 +1194,9 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask,
continue;
node = zone_to_nid(zone);
- page = dequeue_huge_page_node_exact(h, node);
- if (page)
- return page;
+ folio = dequeue_hugetlb_folio_node_exact(h, node);
+ if (folio)
+ return folio;
}
if (unlikely(read_mems_allowed_retry(cpuset_mems_cookie)))
goto retry_cpuset;
@@ -1210,7 +1214,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
unsigned long address, int avoid_reserve,
long chg)
{
- struct page *page = NULL;
+ struct folio *folio = NULL;
struct mempolicy *mpol;
gfp_t gfp_mask;
nodemask_t *nodemask;
@@ -1232,22 +1236,24 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
if (mpol_is_preferred_many(mpol)) {
- page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+ folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask,
+ nid, nodemask);
/* Fallback to all nodes if page==NULL */
nodemask = NULL;
}
- if (!page)
- page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+ if (!folio)
+ folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask,
+ nid, nodemask);
- if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
- SetHPageRestoreReserve(page);
+ if (folio && !avoid_reserve && vma_has_reserves(vma, chg)) {
+ folio_set_hugetlb_restore_reserve(folio);
h->resv_huge_pages--;
}
mpol_cond_put(mpol);
- return page;
+ return &folio->page;
err:
return NULL;
@@ -2331,12 +2337,13 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
{
spin_lock_irq(&hugetlb_lock);
if (available_huge_pages(h)) {
- struct page *page;
+ struct folio *folio;
- page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask);
- if (page) {
+ folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask,
+ preferred_nid, nmask);
+ if (folio) {
spin_unlock_irq(&hugetlb_lock);
- return page;
+ return &folio->page;
}
}
spin_unlock_irq(&hugetlb_lock);