[v2,06/10] mm: memory: use a folio in zap_pte_range()
Commit Message
Make should_zap_page() to take a folio and use a folio in
zap_pte_range(), which save several compound_head() calls.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
mm/memory.c | 45 +++++++++++++++++++++++++--------------------
1 file changed, 25 insertions(+), 20 deletions(-)
Comments
On Sat, Nov 04, 2023 at 11:55:18AM +0800, Kefeng Wang wrote:
> -/* Decides whether we should zap this page with the page pointer specified */
> -static inline bool should_zap_page(struct zap_details *details, struct page *page)
> +/* Decides whether we should zap this folio with the folio pointer specified */
> +static inline bool should_zap_page(struct zap_details *details, struct folio *folio)
Surely we should rename this to should_zap_folio()?
> @@ -1487,10 +1492,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
> * see zap_install_uffd_wp_if_needed().
> */
> WARN_ON_ONCE(!vma_is_anonymous(vma));
> - rss[mm_counter(page)]--;
> + rss[mm_counter(&folio->page)]--;
> if (is_device_private_entry(entry))
> - page_remove_rmap(page, vma, false);
> - put_page(page);
> + page_remove_rmap(&folio->page, vma, false);
> + folio_put(folio);
This is wrong. If we have a PTE-mapped THP, you'll remove the head page
N times instead of removing each of N pages.
I suspect you're going to collide with Ryan's work by doing this ...
On 2023/11/5 1:20, Matthew Wilcox wrote:
> On Sat, Nov 04, 2023 at 11:55:18AM +0800, Kefeng Wang wrote:
>> -/* Decides whether we should zap this page with the page pointer specified */
>> -static inline bool should_zap_page(struct zap_details *details, struct page *page)
>> +/* Decides whether we should zap this folio with the folio pointer specified */
>> +static inline bool should_zap_page(struct zap_details *details, struct folio *folio)
>
> Surely we should rename this to should_zap_folio()?
Will update.
>
>> @@ -1487,10 +1492,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>> * see zap_install_uffd_wp_if_needed().
>> */
>> WARN_ON_ONCE(!vma_is_anonymous(vma));
>> - rss[mm_counter(page)]--;
>> + rss[mm_counter(&folio->page)]--;
>> if (is_device_private_entry(entry))
>> - page_remove_rmap(page, vma, false);
>> - put_page(page);
>> + page_remove_rmap(&folio->page, vma, false);
>> + folio_put(folio);
>
> This is wrong. If we have a PTE-mapped THP, you'll remove the head page
> N times instead of removing each of N pages.
This is device private entry, I suppose that it won't be a THP and large
folio when check migrate_vma_check_page() and migrate_vma_insert_page(),
right?
>
> I suspect you're going to collide with Ryan's work by doing this ...
>
Maybe not if the above is true, at least for now.
Thanks.
On Mon, Nov 06, 2023 at 10:30:59AM +0800, Kefeng Wang wrote:
> On 2023/11/5 1:20, Matthew Wilcox wrote:
> > > - page_remove_rmap(page, vma, false);
> > > - put_page(page);
> > > + page_remove_rmap(&folio->page, vma, false);
> > > + folio_put(folio);
> >
> > This is wrong. If we have a PTE-mapped THP, you'll remove the head page
> > N times instead of removing each of N pages.
>
> This is device private entry, I suppose that it won't be a THP and large
> folio when check migrate_vma_check_page() and migrate_vma_insert_page(),
> right?
I don't want to leave that kind of booby-trap in the code. Both places
which currently call page_remove_rmap() should be left as referring to
the page, not the folio.
On 2023/11/6 22:20, Matthew Wilcox wrote:
> On Mon, Nov 06, 2023 at 10:30:59AM +0800, Kefeng Wang wrote:
>> On 2023/11/5 1:20, Matthew Wilcox wrote:
>>>> - page_remove_rmap(page, vma, false);
>>>> - put_page(page);
>>>> + page_remove_rmap(&folio->page, vma, false);
>>>> + folio_put(folio);
>>>
>>> This is wrong. If we have a PTE-mapped THP, you'll remove the head page
>>> N times instead of removing each of N pages.
>>
>> This is device private entry, I suppose that it won't be a THP and large
>> folio when check migrate_vma_check_page() and migrate_vma_insert_page(),
>> right?
>
> I don't want to leave that kind of booby-trap in the code. Both places
> which currently call page_remove_rmap() should be left as referring to
> the page, not the folio.
Sure, I will fix this, also page_try_dup_anon_rmap() for device private
entry in copy_nonpresent_pte of patch5.
@@ -1358,19 +1358,19 @@ static inline bool should_zap_cows(struct zap_details *details)
return details->even_cows;
}
-/* Decides whether we should zap this page with the page pointer specified */
-static inline bool should_zap_page(struct zap_details *details, struct page *page)
+/* Decides whether we should zap this folio with the folio pointer specified */
+static inline bool should_zap_page(struct zap_details *details, struct folio *folio)
{
- /* If we can make a decision without *page.. */
+ /* If we can make a decision without *folio.. */
if (should_zap_cows(details))
return true;
- /* E.g. the caller passes NULL for the case of a zero page */
- if (!page)
+ /* E.g. the caller passes NULL for the case of a zero folio */
+ if (!folio)
return true;
- /* Otherwise we should only zap non-anon pages */
- return !PageAnon(page);
+ /* Otherwise we should only zap non-anon folios */
+ return !folio_test_anon(folio);
}
static inline bool zap_drop_file_uffd_wp(struct zap_details *details)
@@ -1423,7 +1423,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
arch_enter_lazy_mmu_mode();
do {
pte_t ptent = ptep_get(pte);
- struct page *page;
+ struct folio *folio = NULL;
if (pte_none(ptent))
continue;
@@ -1433,9 +1433,13 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
if (pte_present(ptent)) {
unsigned int delay_rmap;
+ struct page *page;
page = vm_normal_page(vma, addr, ptent);
- if (unlikely(!should_zap_page(details, page)))
+ if (page)
+ folio = page_folio(page);
+
+ if (unlikely(!should_zap_page(details, folio)))
continue;
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
@@ -1449,16 +1453,16 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
}
delay_rmap = 0;
- if (!PageAnon(page)) {
+ if (!folio_test_anon(folio)) {
if (pte_dirty(ptent)) {
- set_page_dirty(page);
+ folio_set_dirty(folio);
if (tlb_delay_rmap(tlb)) {
delay_rmap = 1;
force_flush = 1;
}
}
if (pte_young(ptent) && likely(vma_has_recency(vma)))
- mark_page_accessed(page);
+ folio_mark_accessed(folio);
}
rss[mm_counter(page)]--;
if (!delay_rmap) {
@@ -1477,9 +1481,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
entry = pte_to_swp_entry(ptent);
if (is_device_private_entry(entry) ||
is_device_exclusive_entry(entry)) {
- page = pfn_swap_entry_to_page(entry);
- if (unlikely(!should_zap_page(details, page)))
+ folio = pfn_swap_entry_to_folio(entry);
+ if (unlikely(!should_zap_page(details, folio)))
continue;
+
/*
* Both device private/exclusive mappings should only
* work with anonymous page so far, so we don't need to
@@ -1487,10 +1492,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
* see zap_install_uffd_wp_if_needed().
*/
WARN_ON_ONCE(!vma_is_anonymous(vma));
- rss[mm_counter(page)]--;
+ rss[mm_counter(&folio->page)]--;
if (is_device_private_entry(entry))
- page_remove_rmap(page, vma, false);
- put_page(page);
+ page_remove_rmap(&folio->page, vma, false);
+ folio_put(folio);
} else if (!non_swap_entry(entry)) {
/* Genuine swap entry, hence a private anon page */
if (!should_zap_cows(details))
@@ -1499,10 +1504,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
if (unlikely(!free_swap_and_cache(entry)))
print_bad_pte(vma, addr, ptent, NULL);
} else if (is_migration_entry(entry)) {
- page = pfn_swap_entry_to_page(entry);
- if (!should_zap_page(details, page))
+ folio = pfn_swap_entry_to_folio(entry);
+ if (!should_zap_page(details, folio))
continue;
- rss[mm_counter(page)]--;
+ rss[mm_counter(&folio->page)]--;
} else if (pte_marker_entry_uffd_wp(entry)) {
/*
* For anon: always drop the marker; for file: only