[v1,1/9] mm/memory: factor out zapping of present pte into zap_present_pte()

Message ID 20240129143221.263763-2-david@redhat.com
State New
Headers
Series mm/memory: optimize unmap/zap with PTE-mapped THP |

Commit Message

David Hildenbrand Jan. 29, 2024, 2:32 p.m. UTC
  Let's prepare for further changes by factoring out processing of present
PTEs.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 mm/memory.c | 92 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 52 insertions(+), 40 deletions(-)
  

Comments

David Hildenbrand Jan. 30, 2024, 8:41 a.m. UTC | #1
On 30.01.24 09:13, Ryan Roberts wrote:
> On 29/01/2024 14:32, David Hildenbrand wrote:
>> Let's prepare for further changes by factoring out processing of present
>> PTEs.
>>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
>> ---
>>   mm/memory.c | 92 ++++++++++++++++++++++++++++++-----------------------
>>   1 file changed, 52 insertions(+), 40 deletions(-)
>>
>> diff --git a/mm/memory.c b/mm/memory.c
>> index b05fd28dbce1..50a6c79c78fc 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
>>   	pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
>>   }
>>   
>> +static inline void zap_present_pte(struct mmu_gather *tlb,
>> +		struct vm_area_struct *vma, pte_t *pte, pte_t ptent,
>> +		unsigned long addr, struct zap_details *details,
>> +		int *rss, bool *force_flush, bool *force_break)
>> +{
>> +	struct mm_struct *mm = tlb->mm;
>> +	bool delay_rmap = false;
>> +	struct folio *folio;
> 
> You need to init this to NULL otherwise its a random value when calling
> should_zap_folio() if vm_normal_page() returns NULL.

Right, and we can stop setting it to NULL in the original function. 
Patch #2 changes these checks, which is why it's only a problem in this 
patch.

Will fix, thanks!
  
David Hildenbrand Jan. 30, 2024, 8:49 a.m. UTC | #2
On 30.01.24 09:46, Ryan Roberts wrote:
> On 30/01/2024 08:41, David Hildenbrand wrote:
>> On 30.01.24 09:13, Ryan Roberts wrote:
>>> On 29/01/2024 14:32, David Hildenbrand wrote:
>>>> Let's prepare for further changes by factoring out processing of present
>>>> PTEs.
>>>>
>>>> Signed-off-by: David Hildenbrand <david@redhat.com>
>>>> ---
>>>>    mm/memory.c | 92 ++++++++++++++++++++++++++++++-----------------------
>>>>    1 file changed, 52 insertions(+), 40 deletions(-)
>>>>
>>>> diff --git a/mm/memory.c b/mm/memory.c
>>>> index b05fd28dbce1..50a6c79c78fc 100644
>>>> --- a/mm/memory.c
>>>> +++ b/mm/memory.c
>>>> @@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct
>>>> *vma,
>>>>        pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
>>>>    }
>>>>    +static inline void zap_present_pte(struct mmu_gather *tlb,
>>>> +        struct vm_area_struct *vma, pte_t *pte, pte_t ptent,
>>>> +        unsigned long addr, struct zap_details *details,
>>>> +        int *rss, bool *force_flush, bool *force_break)
>>>> +{
>>>> +    struct mm_struct *mm = tlb->mm;
>>>> +    bool delay_rmap = false;
>>>> +    struct folio *folio;
>>>
>>> You need to init this to NULL otherwise its a random value when calling
>>> should_zap_folio() if vm_normal_page() returns NULL.
>>
>> Right, and we can stop setting it to NULL in the original function. Patch #2
>> changes these checks, which is why it's only a problem in this patch.
> 
> Yeah I only noticed that after sending out this reply and moving to the next
> patch. Still worth fixing this intermediate state I think.

Absolutely, I didn't do path-by-patch compilation yet (I suspect the 
compiler would complain).
  

Patch

diff --git a/mm/memory.c b/mm/memory.c
index b05fd28dbce1..50a6c79c78fc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1532,13 +1532,61 @@  zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
 	pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
 }
 
+static inline void zap_present_pte(struct mmu_gather *tlb,
+		struct vm_area_struct *vma, pte_t *pte, pte_t ptent,
+		unsigned long addr, struct zap_details *details,
+		int *rss, bool *force_flush, bool *force_break)
+{
+	struct mm_struct *mm = tlb->mm;
+	bool delay_rmap = false;
+	struct folio *folio;
+	struct page *page;
+
+	page = vm_normal_page(vma, addr, ptent);
+	if (page)
+		folio = page_folio(page);
+
+	if (unlikely(!should_zap_folio(details, folio)))
+		return;
+	ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm);
+	arch_check_zapped_pte(vma, ptent);
+	tlb_remove_tlb_entry(tlb, pte, addr);
+	zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent);
+	if (unlikely(!page)) {
+		ksm_might_unmap_zero_page(mm, ptent);
+		return;
+	}
+
+	if (!folio_test_anon(folio)) {
+		if (pte_dirty(ptent)) {
+			folio_mark_dirty(folio);
+			if (tlb_delay_rmap(tlb)) {
+				delay_rmap = true;
+				*force_flush = true;
+			}
+		}
+		if (pte_young(ptent) && likely(vma_has_recency(vma)))
+			folio_mark_accessed(folio);
+	}
+	rss[mm_counter(folio)]--;
+	if (!delay_rmap) {
+		folio_remove_rmap_pte(folio, page, vma);
+		if (unlikely(page_mapcount(page) < 0))
+			print_bad_pte(vma, addr, ptent, page);
+	}
+	if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
+		*force_flush = true;
+		*force_break = true;
+	}
+}
+
 static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pmd_t *pmd,
 				unsigned long addr, unsigned long end,
 				struct zap_details *details)
 {
+	bool force_flush = false, force_break = false;
 	struct mm_struct *mm = tlb->mm;
-	int force_flush = 0;
 	int rss[NR_MM_COUNTERS];
 	spinlock_t *ptl;
 	pte_t *start_pte;
@@ -1565,45 +1613,9 @@  static unsigned long zap_pte_range(struct mmu_gather *tlb,
 			break;
 
 		if (pte_present(ptent)) {
-			unsigned int delay_rmap;
-
-			page = vm_normal_page(vma, addr, ptent);
-			if (page)
-				folio = page_folio(page);
-
-			if (unlikely(!should_zap_folio(details, folio)))
-				continue;
-			ptent = ptep_get_and_clear_full(mm, addr, pte,
-							tlb->fullmm);
-			arch_check_zapped_pte(vma, ptent);
-			tlb_remove_tlb_entry(tlb, pte, addr);
-			zap_install_uffd_wp_if_needed(vma, addr, pte, details,
-						      ptent);
-			if (unlikely(!page)) {
-				ksm_might_unmap_zero_page(mm, ptent);
-				continue;
-			}
-
-			delay_rmap = 0;
-			if (!folio_test_anon(folio)) {
-				if (pte_dirty(ptent)) {
-					folio_mark_dirty(folio);
-					if (tlb_delay_rmap(tlb)) {
-						delay_rmap = 1;
-						force_flush = 1;
-					}
-				}
-				if (pte_young(ptent) && likely(vma_has_recency(vma)))
-					folio_mark_accessed(folio);
-			}
-			rss[mm_counter(folio)]--;
-			if (!delay_rmap) {
-				folio_remove_rmap_pte(folio, page, vma);
-				if (unlikely(page_mapcount(page) < 0))
-					print_bad_pte(vma, addr, ptent, page);
-			}
-			if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
-				force_flush = 1;
+			zap_present_pte(tlb, vma, pte, ptent, addr, details,
+					rss, &force_flush, &force_break);
+			if (unlikely(force_break)) {
 				addr += PAGE_SIZE;
 				break;
 			}