[v2,06/10] mm: memory: use a folio in zap_pte_range()

Message ID 20231104035522.2418660-7-wangkefeng.wang@huawei.com
State New
Headers
Series mm: convert mm counter to take a folio |

Commit Message

Kefeng Wang Nov. 4, 2023, 3:55 a.m. UTC
  Make should_zap_page() to take a folio and use a folio in
zap_pte_range(), which save several compound_head() calls.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 mm/memory.c | 45 +++++++++++++++++++++++++--------------------
 1 file changed, 25 insertions(+), 20 deletions(-)
  

Comments

Matthew Wilcox Nov. 4, 2023, 5:20 p.m. UTC | #1
On Sat, Nov 04, 2023 at 11:55:18AM +0800, Kefeng Wang wrote:
> -/* Decides whether we should zap this page with the page pointer specified */
> -static inline bool should_zap_page(struct zap_details *details, struct page *page)
> +/* Decides whether we should zap this folio with the folio pointer specified */
> +static inline bool should_zap_page(struct zap_details *details, struct folio *folio)

Surely we should rename this to should_zap_folio()?

> @@ -1487,10 +1492,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>  			 * see zap_install_uffd_wp_if_needed().
>  			 */
>  			WARN_ON_ONCE(!vma_is_anonymous(vma));
> -			rss[mm_counter(page)]--;
> +			rss[mm_counter(&folio->page)]--;
>  			if (is_device_private_entry(entry))
> -				page_remove_rmap(page, vma, false);
> -			put_page(page);
> +				page_remove_rmap(&folio->page, vma, false);
> +			folio_put(folio);

This is wrong.  If we have a PTE-mapped THP, you'll remove the head page
N times instead of removing each of N pages.

I suspect you're going to collide with Ryan's work by doing this ...
  
Kefeng Wang Nov. 6, 2023, 2:30 a.m. UTC | #2
On 2023/11/5 1:20, Matthew Wilcox wrote:
> On Sat, Nov 04, 2023 at 11:55:18AM +0800, Kefeng Wang wrote:
>> -/* Decides whether we should zap this page with the page pointer specified */
>> -static inline bool should_zap_page(struct zap_details *details, struct page *page)
>> +/* Decides whether we should zap this folio with the folio pointer specified */
>> +static inline bool should_zap_page(struct zap_details *details, struct folio *folio)
> 
> Surely we should rename this to should_zap_folio()?
Will update.
> 
>> @@ -1487,10 +1492,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>>   			 * see zap_install_uffd_wp_if_needed().
>>   			 */
>>   			WARN_ON_ONCE(!vma_is_anonymous(vma));
>> -			rss[mm_counter(page)]--;
>> +			rss[mm_counter(&folio->page)]--;
>>   			if (is_device_private_entry(entry))
>> -				page_remove_rmap(page, vma, false);
>> -			put_page(page);
>> +				page_remove_rmap(&folio->page, vma, false);
>> +			folio_put(folio);
> 
> This is wrong.  If we have a PTE-mapped THP, you'll remove the head page
> N times instead of removing each of N pages.

This is device private entry, I suppose that it won't be a THP and large 
folio when check migrate_vma_check_page() and migrate_vma_insert_page(),
right?

> 
> I suspect you're going to collide with Ryan's work by doing this ...
> 
Maybe not if the above is true, at least for now.

Thanks.
  
Matthew Wilcox Nov. 6, 2023, 2:20 p.m. UTC | #3
On Mon, Nov 06, 2023 at 10:30:59AM +0800, Kefeng Wang wrote:
> On 2023/11/5 1:20, Matthew Wilcox wrote:
> > > -				page_remove_rmap(page, vma, false);
> > > -			put_page(page);
> > > +				page_remove_rmap(&folio->page, vma, false);
> > > +			folio_put(folio);
> > 
> > This is wrong.  If we have a PTE-mapped THP, you'll remove the head page
> > N times instead of removing each of N pages.
> 
> This is device private entry, I suppose that it won't be a THP and large
> folio when check migrate_vma_check_page() and migrate_vma_insert_page(),
> right?

I don't want to leave that kind of booby-trap in the code.  Both places
which currently call page_remove_rmap() should be left as referring to
the page, not the folio.
  
Kefeng Wang Nov. 6, 2023, 3:08 p.m. UTC | #4
On 2023/11/6 22:20, Matthew Wilcox wrote:
> On Mon, Nov 06, 2023 at 10:30:59AM +0800, Kefeng Wang wrote:
>> On 2023/11/5 1:20, Matthew Wilcox wrote:
>>>> -				page_remove_rmap(page, vma, false);
>>>> -			put_page(page);
>>>> +				page_remove_rmap(&folio->page, vma, false);
>>>> +			folio_put(folio);
>>>
>>> This is wrong.  If we have a PTE-mapped THP, you'll remove the head page
>>> N times instead of removing each of N pages.
>>
>> This is device private entry, I suppose that it won't be a THP and large
>> folio when check migrate_vma_check_page() and migrate_vma_insert_page(),
>> right?
> 
> I don't want to leave that kind of booby-trap in the code.  Both places
> which currently call page_remove_rmap() should be left as referring to
> the page, not the folio.

Sure, I will fix this, also page_try_dup_anon_rmap() for device private
entry in copy_nonpresent_pte of patch5.
  

Patch

diff --git a/mm/memory.c b/mm/memory.c
index d9314dee355e..806568f9605b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1358,19 +1358,19 @@  static inline bool should_zap_cows(struct zap_details *details)
 	return details->even_cows;
 }
 
-/* Decides whether we should zap this page with the page pointer specified */
-static inline bool should_zap_page(struct zap_details *details, struct page *page)
+/* Decides whether we should zap this folio with the folio pointer specified */
+static inline bool should_zap_page(struct zap_details *details, struct folio *folio)
 {
-	/* If we can make a decision without *page.. */
+	/* If we can make a decision without *folio.. */
 	if (should_zap_cows(details))
 		return true;
 
-	/* E.g. the caller passes NULL for the case of a zero page */
-	if (!page)
+	/* E.g. the caller passes NULL for the case of a zero folio */
+	if (!folio)
 		return true;
 
-	/* Otherwise we should only zap non-anon pages */
-	return !PageAnon(page);
+	/* Otherwise we should only zap non-anon folios */
+	return !folio_test_anon(folio);
 }
 
 static inline bool zap_drop_file_uffd_wp(struct zap_details *details)
@@ -1423,7 +1423,7 @@  static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	arch_enter_lazy_mmu_mode();
 	do {
 		pte_t ptent = ptep_get(pte);
-		struct page *page;
+		struct folio *folio = NULL;
 
 		if (pte_none(ptent))
 			continue;
@@ -1433,9 +1433,13 @@  static unsigned long zap_pte_range(struct mmu_gather *tlb,
 
 		if (pte_present(ptent)) {
 			unsigned int delay_rmap;
+			struct page *page;
 
 			page = vm_normal_page(vma, addr, ptent);
-			if (unlikely(!should_zap_page(details, page)))
+			if (page)
+				folio = page_folio(page);
+
+			if (unlikely(!should_zap_page(details, folio)))
 				continue;
 			ptent = ptep_get_and_clear_full(mm, addr, pte,
 							tlb->fullmm);
@@ -1449,16 +1453,16 @@  static unsigned long zap_pte_range(struct mmu_gather *tlb,
 			}
 
 			delay_rmap = 0;
-			if (!PageAnon(page)) {
+			if (!folio_test_anon(folio)) {
 				if (pte_dirty(ptent)) {
-					set_page_dirty(page);
+					folio_set_dirty(folio);
 					if (tlb_delay_rmap(tlb)) {
 						delay_rmap = 1;
 						force_flush = 1;
 					}
 				}
 				if (pte_young(ptent) && likely(vma_has_recency(vma)))
-					mark_page_accessed(page);
+					folio_mark_accessed(folio);
 			}
 			rss[mm_counter(page)]--;
 			if (!delay_rmap) {
@@ -1477,9 +1481,10 @@  static unsigned long zap_pte_range(struct mmu_gather *tlb,
 		entry = pte_to_swp_entry(ptent);
 		if (is_device_private_entry(entry) ||
 		    is_device_exclusive_entry(entry)) {
-			page = pfn_swap_entry_to_page(entry);
-			if (unlikely(!should_zap_page(details, page)))
+			folio = pfn_swap_entry_to_folio(entry);
+			if (unlikely(!should_zap_page(details, folio)))
 				continue;
+
 			/*
 			 * Both device private/exclusive mappings should only
 			 * work with anonymous page so far, so we don't need to
@@ -1487,10 +1492,10 @@  static unsigned long zap_pte_range(struct mmu_gather *tlb,
 			 * see zap_install_uffd_wp_if_needed().
 			 */
 			WARN_ON_ONCE(!vma_is_anonymous(vma));
-			rss[mm_counter(page)]--;
+			rss[mm_counter(&folio->page)]--;
 			if (is_device_private_entry(entry))
-				page_remove_rmap(page, vma, false);
-			put_page(page);
+				page_remove_rmap(&folio->page, vma, false);
+			folio_put(folio);
 		} else if (!non_swap_entry(entry)) {
 			/* Genuine swap entry, hence a private anon page */
 			if (!should_zap_cows(details))
@@ -1499,10 +1504,10 @@  static unsigned long zap_pte_range(struct mmu_gather *tlb,
 			if (unlikely(!free_swap_and_cache(entry)))
 				print_bad_pte(vma, addr, ptent, NULL);
 		} else if (is_migration_entry(entry)) {
-			page = pfn_swap_entry_to_page(entry);
-			if (!should_zap_page(details, page))
+			folio = pfn_swap_entry_to_folio(entry);
+			if (!should_zap_page(details, folio))
 				continue;
-			rss[mm_counter(page)]--;
+			rss[mm_counter(&folio->page)]--;
 		} else if (pte_marker_entry_uffd_wp(entry)) {
 			/*
 			 * For anon: always drop the marker; for file: only