[-next,1/7] mm: huge_memory: make __do_huge_pmd_anonymous_page() to take a folio

Message ID 20230112083006.163393-2-wangkefeng.wang@huawei.com
State New
Headers
Series mm: remove cgroup_throttle_swaprate() completely |

Commit Message

Kefeng Wang Jan. 12, 2023, 8:30 a.m. UTC
  Let's __do_huge_pmd_anonymous_page() take a folio and convert related
functions to use folios.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 mm/huge_memory.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)
  

Comments

Matthew Wilcox Jan. 13, 2023, 2:25 p.m. UTC | #1
On Thu, Jan 12, 2023 at 04:30:00PM +0800, Kefeng Wang wrote:
> Let's __do_huge_pmd_anonymous_page() take a folio and convert related
> functions to use folios.

No, this is actively wrong!  Andrew, please drop this patch.

If we want to support folio sizes larger than PMD size (and I think we
do), we need to be able to specify precisely which page in the folio
is to be stored at this PTE.  The *interface* must remain struct page.
We can convert from page to folio within the function, but we *MUST NOT*
go the other way.

>  static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
> -			struct page *page, gfp_t gfp)
> +			struct folio *folio, gfp_t gfp)
>  {
>  	struct vm_area_struct *vma = vmf->vma;
> +	struct page *page = &folio->page;

... ie this is bad and wrong.

> @@ -834,7 +835,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
>  		count_vm_event(THP_FAULT_FALLBACK);
>  		return VM_FAULT_FALLBACK;
>  	}
> -	return __do_huge_pmd_anonymous_page(vmf, &folio->page, gfp);
> +	return __do_huge_pmd_anonymous_page(vmf, folio, gfp);
>  }
>  
>  static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,

A reasonable person might ask "But Matthew, you allocated a folio here,
then you're converting back to a struct page to call
__do_huge_pmd_anonymous_page() so isn't this a sensible patch?"

And I would say "still no".  This is a question of interfaces, and
even though __do_huge_pmd_anonymous_page() is static and has precisely
one caller today that always allocates a folio of precisely PMD size,
I suspect it will need to be more visible in the future and the
conversion of the interface from page to folio misleads people.
  
Kefeng Wang Jan. 16, 2023, 11:09 a.m. UTC | #2
On 2023/1/13 22:25, Matthew Wilcox wrote:
> On Thu, Jan 12, 2023 at 04:30:00PM +0800, Kefeng Wang wrote:
>> Let's __do_huge_pmd_anonymous_page() take a folio and convert related
>> functions to use folios.
> 
> No, this is actively wrong!  Andrew, please drop this patch.
> 
> If we want to support folio sizes larger than PMD size (and I think we
> do), we need to be able to specify precisely which page in the folio
> is to be stored at this PTE.  The *interface* must remain struct page.
> We can convert from page to folio within the function, but we *MUST NOT*
> go the other way.

Got it,

> 
>>   static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
>> -			struct page *page, gfp_t gfp)
>> +			struct folio *folio, gfp_t gfp)
>>   {
>>   	struct vm_area_struct *vma = vmf->vma;
>> +	struct page *page = &folio->page;
> 
> ... ie this is bad and wrong.
> 
>> @@ -834,7 +835,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
>>   		count_vm_event(THP_FAULT_FALLBACK);
>>   		return VM_FAULT_FALLBACK;
>>   	}
>> -	return __do_huge_pmd_anonymous_page(vmf, &folio->page, gfp);
>> +	return __do_huge_pmd_anonymous_page(vmf, folio, gfp);
>>   }
>>   
>>   static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
> 
> A reasonable person might ask "But Matthew, you allocated a folio here,
> then you're converting back to a struct page to call
> __do_huge_pmd_anonymous_page() so isn't this a sensible patch?"

and this is why I change the parameter from page to folio(no need to go 
back and forth between page and folio),
> 
> And I would say "still no".  This is a question of interfaces, and
> even though __do_huge_pmd_anonymous_page() is static and has precisely
> one caller today that always allocates a folio of precisely PMD size,
> I suspect it will need to be more visible in the future and the
> conversion of the interface from page to folio misleads people.
ok, will keep page for __do_huge_pmd_anonymous_page().
>
  

Patch

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c13b1f67d14e..cb23b24e2eb8 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -650,22 +650,23 @@  unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
 EXPORT_SYMBOL_GPL(thp_get_unmapped_area);
 
 static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
-			struct page *page, gfp_t gfp)
+			struct folio *folio, gfp_t gfp)
 {
 	struct vm_area_struct *vma = vmf->vma;
+	struct page *page = &folio->page;
 	pgtable_t pgtable;
 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 	vm_fault_t ret = 0;
 
-	VM_BUG_ON_PAGE(!PageCompound(page), page);
+	VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
 
-	if (mem_cgroup_charge(page_folio(page), vma->vm_mm, gfp)) {
-		put_page(page);
+	if (mem_cgroup_charge(folio, vma->vm_mm, gfp)) {
+		folio_put(folio);
 		count_vm_event(THP_FAULT_FALLBACK);
 		count_vm_event(THP_FAULT_FALLBACK_CHARGE);
 		return VM_FAULT_FALLBACK;
 	}
-	cgroup_throttle_swaprate(page, gfp);
+	folio_throttle_swaprate(folio, gfp);
 
 	pgtable = pte_alloc_one(vma->vm_mm);
 	if (unlikely(!pgtable)) {
@@ -675,11 +676,11 @@  static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 
 	clear_huge_page(page, vmf->address, HPAGE_PMD_NR);
 	/*
-	 * The memory barrier inside __SetPageUptodate makes sure that
+	 * The memory barrier inside __folio_mark_uptodate makes sure that
 	 * clear_huge_page writes become visible before the set_pmd_at()
 	 * write.
 	 */
-	__SetPageUptodate(page);
+	__folio_mark_uptodate(folio);
 
 	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
 	if (unlikely(!pmd_none(*vmf->pmd))) {
@@ -694,7 +695,7 @@  static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		/* Deliver the page fault to userland */
 		if (userfaultfd_missing(vma)) {
 			spin_unlock(vmf->ptl);
-			put_page(page);
+			folio_put(folio);
 			pte_free(vma->vm_mm, pgtable);
 			ret = handle_userfault(vmf, VM_UFFD_MISSING);
 			VM_BUG_ON(ret & VM_FAULT_FALLBACK);
@@ -704,7 +705,7 @@  static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		entry = mk_huge_pmd(page, vma->vm_page_prot);
 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 		page_add_new_anon_rmap(page, vma, haddr);
-		lru_cache_add_inactive_or_unevictable(page, vma);
+		folio_add_lru_vma(folio, vma);
 		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
 		set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
 		update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
@@ -721,7 +722,7 @@  static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 release:
 	if (pgtable)
 		pte_free(vma->vm_mm, pgtable);
-	put_page(page);
+	folio_put(folio);
 	return ret;
 
 }
@@ -834,7 +835,7 @@  vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
 	}
-	return __do_huge_pmd_anonymous_page(vmf, &folio->page, gfp);
+	return __do_huge_pmd_anonymous_page(vmf, folio, gfp);
 }
 
 static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,