[RFC,23/39] mm/rmap: introduce folio_remove_rmap_[pte|ptes|pmd]()
Commit Message
Let's mimic what we did with folio_add_file_rmap_*() and
folio_add_anon_rmap_*() so we can similarly replace page_remove_rmap()
next.
Make the compiler always special-case on the granularity by using
__always_inline.
We're adding folio_remove_rmap_ptes() handling right away, as we want to
use that soon for batching rmap operations when unmapping PTE-mapped
large folios.
Signed-off-by: David Hildenbrand <david@redhat.com>
---
include/linux/rmap.h | 6 ++++
mm/rmap.c | 76 ++++++++++++++++++++++++++++++++++++--------
2 files changed, 68 insertions(+), 14 deletions(-)
Comments
On 04/12/2023 14:21, David Hildenbrand wrote:
> Let's mimic what we did with folio_add_file_rmap_*() and
> folio_add_anon_rmap_*() so we can similarly replace page_remove_rmap()
> next.
>
> Make the compiler always special-case on the granularity by using
> __always_inline.
>
> We're adding folio_remove_rmap_ptes() handling right away, as we want to
> use that soon for batching rmap operations when unmapping PTE-mapped
> large folios.
>
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
> include/linux/rmap.h | 6 ++++
> mm/rmap.c | 76 ++++++++++++++++++++++++++++++++++++--------
> 2 files changed, 68 insertions(+), 14 deletions(-)
>
> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> index 017b216915f19..dd4ffb1d8ae04 100644
> --- a/include/linux/rmap.h
> +++ b/include/linux/rmap.h
> @@ -241,6 +241,12 @@ void folio_add_file_rmap_pmd(struct folio *, struct page *,
> struct vm_area_struct *);
> void page_remove_rmap(struct page *, struct vm_area_struct *,
> bool compound);
> +void folio_remove_rmap_ptes(struct folio *, struct page *, unsigned int nr,
> + struct vm_area_struct *);
> +#define folio_remove_rmap_pte(folio, page, vma) \
> + folio_remove_rmap_ptes(folio, page, 1, vma)
> +void folio_remove_rmap_pmd(struct folio *, struct page *,
> + struct vm_area_struct *);
>
> void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *,
> unsigned long address, rmap_t flags);
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 3587225055c5e..50b6909157ac1 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1463,25 +1463,36 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
> bool compound)
> {
> struct folio *folio = page_folio(page);
> +
> + if (likely(!compound))
> + folio_remove_rmap_pte(folio, page, vma);
> + else
> + folio_remove_rmap_pmd(folio, page, vma);
> +}
> +
> +static __always_inline void __folio_remove_rmap(struct folio *folio,
> + struct page *page, unsigned int nr_pages,
> + struct vm_area_struct *vma, enum rmap_mode mode)
> +{
> atomic_t *mapped = &folio->_nr_pages_mapped;
> - int nr = 0, nr_pmdmapped = 0;
> - bool last;
> + int last, nr = 0, nr_pmdmapped = 0;
nit: you're being inconsistent across the functions with signed vs unsigned for
page counts (e.g. nr, nr_pmdmapped) - see __folio_add_rmap(),
__folio_add_file_rmap(), __folio_add_anon_rmap().
I suggest pick one and stick to it. Personally I'd go with signed int (since
that's what all the counters in struct folio that we are manipulating are,
underneath the atomic_t) then check that nr_pages > 0 in
__folio_rmap_sanity_checks().
> enum node_stat_item idx;
>
> - VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
> - VM_BUG_ON_PAGE(compound && !PageHead(page), page);
> + __folio_rmap_sanity_checks(folio, page, nr_pages, mode);
>
> /* Is page being unmapped by PTE? Is this its last map to be removed? */
> - if (likely(!compound)) {
> - last = atomic_add_negative(-1, &page->_mapcount);
> - nr = last;
> - if (last && folio_test_large(folio)) {
> - nr = atomic_dec_return_relaxed(mapped);
> - nr = (nr < COMPOUND_MAPPED);
> - }
> - } else if (folio_test_pmd_mappable(folio)) {
> - /* That test is redundant: it's for safety or to optimize out */
> + if (likely(mode == RMAP_MODE_PTE)) {
> + do {
> + last = atomic_add_negative(-1, &page->_mapcount);
> + if (last && folio_test_large(folio)) {
> + last = atomic_dec_return_relaxed(mapped);
> + last = (last < COMPOUND_MAPPED);
> + }
>
> + if (last)
> + nr++;
> + } while (page++, --nr_pages > 0);
> + } else if (mode == RMAP_MODE_PMD) {
> last = atomic_add_negative(-1, &folio->_entire_mapcount);
> if (last) {
> nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped);
> @@ -1517,7 +1528,7 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
> * is still mapped.
> */
> if (folio_test_pmd_mappable(folio) && folio_test_anon(folio))
folio_test_pmd_mappable() -> folio_test_large()
Since you're converting this to support batch PTE removal, it might as well also
support smaller-than-pmd too?
I currently have a patch to do this same change in the multi-size THP series.
> - if (!compound || nr < nr_pmdmapped)
> + if (mode == RMAP_MODE_PTE || nr < nr_pmdmapped)
> deferred_split_folio(folio);
> }
>
> @@ -1532,6 +1543,43 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
> munlock_vma_folio(folio, vma);
> }
>
> +/**
> + * folio_remove_rmap_ptes - remove PTE mappings from a page range of a folio
> + * @folio: The folio to remove the mappings from
> + * @page: The first page to remove
> + * @nr_pages: The number of pages that will be removed from the mapping
> + * @vma: The vm area from which the mappings are removed
> + *
> + * The page range of the folio is defined by [page, page + nr_pages)
> + *
> + * The caller needs to hold the page table lock.
> + */
> +void folio_remove_rmap_ptes(struct folio *folio, struct page *page,
> + unsigned int nr_pages, struct vm_area_struct *vma)
> +{
> + __folio_remove_rmap(folio, page, nr_pages, vma, RMAP_MODE_PTE);
> +}
> +
> +/**
> + * folio_remove_rmap_pmd - remove a PMD mapping from a page range of a folio
> + * @folio: The folio to remove the mapping from
> + * @page: The first page to remove
> + * @vma: The vm area from which the mapping is removed
> + *
> + * The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
> + *
> + * The caller needs to hold the page table lock.
> + */
> +void folio_remove_rmap_pmd(struct folio *folio, struct page *page,
> + struct vm_area_struct *vma)
> +{
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> + __folio_remove_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_MODE_PMD);
> +#else
> + WARN_ON_ONCE(true);
> +#endif
> +}
> +
> /*
> * @arg: enum ttu_flags will be passed to this argument
> */
>> +static __always_inline void __folio_remove_rmap(struct folio *folio,
>> + struct page *page, unsigned int nr_pages,
>> + struct vm_area_struct *vma, enum rmap_mode mode)
>> +{
>> atomic_t *mapped = &folio->_nr_pages_mapped;
>> - int nr = 0, nr_pmdmapped = 0;
>> - bool last;
>> + int last, nr = 0, nr_pmdmapped = 0;
>
> nit: you're being inconsistent across the functions with signed vs unsigned for
> page counts (e.g. nr, nr_pmdmapped) - see __folio_add_rmap(),
> __folio_add_file_rmap(), __folio_add_anon_rmap().
>
Definitely.
> I suggest pick one and stick to it. Personally I'd go with signed int (since
> that's what all the counters in struct folio that we are manipulating are,
> underneath the atomic_t) then check that nr_pages > 0 in
> __folio_rmap_sanity_checks().
Can do, but note that the counters are signed to detect udnerflows. It
doesn't make sense here to pass a negative number.
>
>> enum node_stat_item idx;
>>
>> - VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
>> - VM_BUG_ON_PAGE(compound && !PageHead(page), page);
>> + __folio_rmap_sanity_checks(folio, page, nr_pages, mode);
>>
>> /* Is page being unmapped by PTE? Is this its last map to be removed? */
>> - if (likely(!compound)) {
>> - last = atomic_add_negative(-1, &page->_mapcount);
>> - nr = last;
>> - if (last && folio_test_large(folio)) {
>> - nr = atomic_dec_return_relaxed(mapped);
>> - nr = (nr < COMPOUND_MAPPED);
>> - }
>> - } else if (folio_test_pmd_mappable(folio)) {
>> - /* That test is redundant: it's for safety or to optimize out */
>> + if (likely(mode == RMAP_MODE_PTE)) {
>> + do {
>> + last = atomic_add_negative(-1, &page->_mapcount);
>> + if (last && folio_test_large(folio)) {
>> + last = atomic_dec_return_relaxed(mapped);
>> + last = (last < COMPOUND_MAPPED);
>> + }
>>
>> + if (last)
>> + nr++;
>> + } while (page++, --nr_pages > 0);
>> + } else if (mode == RMAP_MODE_PMD) {
>> last = atomic_add_negative(-1, &folio->_entire_mapcount);
>> if (last) {
>> nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped);
>> @@ -1517,7 +1528,7 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
>> * is still mapped.
>> */
>> if (folio_test_pmd_mappable(folio) && folio_test_anon(folio))
>
> folio_test_pmd_mappable() -> folio_test_large()
>
> Since you're converting this to support batch PTE removal, it might as well also
> support smaller-than-pmd too?
I remember that you have a patch for that, right? :)
>
> I currently have a patch to do this same change in the multi-size THP series.
>
Ah, yes, and that should go in first.
On 05/12/2023 13:09, David Hildenbrand wrote:
>>> +static __always_inline void __folio_remove_rmap(struct folio *folio,
>>> + struct page *page, unsigned int nr_pages,
>>> + struct vm_area_struct *vma, enum rmap_mode mode)
>>> +{
>>> atomic_t *mapped = &folio->_nr_pages_mapped;
>>> - int nr = 0, nr_pmdmapped = 0;
>>> - bool last;
>>> + int last, nr = 0, nr_pmdmapped = 0;
>>
>> nit: you're being inconsistent across the functions with signed vs unsigned for
>> page counts (e.g. nr, nr_pmdmapped) - see __folio_add_rmap(),
>> __folio_add_file_rmap(), __folio_add_anon_rmap().
>>
>
> Definitely.
>
>> I suggest pick one and stick to it. Personally I'd go with signed int (since
>> that's what all the counters in struct folio that we are manipulating are,
>> underneath the atomic_t) then check that nr_pages > 0 in
>> __folio_rmap_sanity_checks().
>
> Can do, but note that the counters are signed to detect udnerflows. It doesn't
> make sense here to pass a negative number.
I agree it doesn't make sense to pass negative - hence the check.
These 2 functions are inconsistent on size, but agree on signed:
long folio_nr_pages(struct folio *folio)
int folio_nr_pages_mapped(struct folio *folio)
I don't have a strong opinon.
>
>>
>>> enum node_stat_item idx;
>>> - VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
>>> - VM_BUG_ON_PAGE(compound && !PageHead(page), page);
>>> + __folio_rmap_sanity_checks(folio, page, nr_pages, mode);
>>> /* Is page being unmapped by PTE? Is this its last map to be removed? */
>>> - if (likely(!compound)) {
>>> - last = atomic_add_negative(-1, &page->_mapcount);
>>> - nr = last;
>>> - if (last && folio_test_large(folio)) {
>>> - nr = atomic_dec_return_relaxed(mapped);
>>> - nr = (nr < COMPOUND_MAPPED);
>>> - }
>>> - } else if (folio_test_pmd_mappable(folio)) {
>>> - /* That test is redundant: it's for safety or to optimize out */
>>> + if (likely(mode == RMAP_MODE_PTE)) {
>>> + do {
>>> + last = atomic_add_negative(-1, &page->_mapcount);
>>> + if (last && folio_test_large(folio)) {
>>> + last = atomic_dec_return_relaxed(mapped);
>>> + last = (last < COMPOUND_MAPPED);
>>> + }
>>> + if (last)
>>> + nr++;
>>> + } while (page++, --nr_pages > 0);
>>> + } else if (mode == RMAP_MODE_PMD) {
>>> last = atomic_add_negative(-1, &folio->_entire_mapcount);
>>> if (last) {
>>> nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped);
>>> @@ -1517,7 +1528,7 @@ void page_remove_rmap(struct page *page, struct
>>> vm_area_struct *vma,
>>> * is still mapped.
>>> */
>>> if (folio_test_pmd_mappable(folio) && folio_test_anon(folio))
>>
>> folio_test_pmd_mappable() -> folio_test_large()
>>
>> Since you're converting this to support batch PTE removal, it might as well also
>> support smaller-than-pmd too?
>
> I remember that you have a patch for that, right? :)
>
>>
>> I currently have a patch to do this same change in the multi-size THP series.
>>
>
> Ah, yes, and that should go in first.
>
>
@@ -241,6 +241,12 @@ void folio_add_file_rmap_pmd(struct folio *, struct page *,
struct vm_area_struct *);
void page_remove_rmap(struct page *, struct vm_area_struct *,
bool compound);
+void folio_remove_rmap_ptes(struct folio *, struct page *, unsigned int nr,
+ struct vm_area_struct *);
+#define folio_remove_rmap_pte(folio, page, vma) \
+ folio_remove_rmap_ptes(folio, page, 1, vma)
+void folio_remove_rmap_pmd(struct folio *, struct page *,
+ struct vm_area_struct *);
void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *,
unsigned long address, rmap_t flags);
@@ -1463,25 +1463,36 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
bool compound)
{
struct folio *folio = page_folio(page);
+
+ if (likely(!compound))
+ folio_remove_rmap_pte(folio, page, vma);
+ else
+ folio_remove_rmap_pmd(folio, page, vma);
+}
+
+static __always_inline void __folio_remove_rmap(struct folio *folio,
+ struct page *page, unsigned int nr_pages,
+ struct vm_area_struct *vma, enum rmap_mode mode)
+{
atomic_t *mapped = &folio->_nr_pages_mapped;
- int nr = 0, nr_pmdmapped = 0;
- bool last;
+ int last, nr = 0, nr_pmdmapped = 0;
enum node_stat_item idx;
- VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
- VM_BUG_ON_PAGE(compound && !PageHead(page), page);
+ __folio_rmap_sanity_checks(folio, page, nr_pages, mode);
/* Is page being unmapped by PTE? Is this its last map to be removed? */
- if (likely(!compound)) {
- last = atomic_add_negative(-1, &page->_mapcount);
- nr = last;
- if (last && folio_test_large(folio)) {
- nr = atomic_dec_return_relaxed(mapped);
- nr = (nr < COMPOUND_MAPPED);
- }
- } else if (folio_test_pmd_mappable(folio)) {
- /* That test is redundant: it's for safety or to optimize out */
+ if (likely(mode == RMAP_MODE_PTE)) {
+ do {
+ last = atomic_add_negative(-1, &page->_mapcount);
+ if (last && folio_test_large(folio)) {
+ last = atomic_dec_return_relaxed(mapped);
+ last = (last < COMPOUND_MAPPED);
+ }
+ if (last)
+ nr++;
+ } while (page++, --nr_pages > 0);
+ } else if (mode == RMAP_MODE_PMD) {
last = atomic_add_negative(-1, &folio->_entire_mapcount);
if (last) {
nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped);
@@ -1517,7 +1528,7 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
* is still mapped.
*/
if (folio_test_pmd_mappable(folio) && folio_test_anon(folio))
- if (!compound || nr < nr_pmdmapped)
+ if (mode == RMAP_MODE_PTE || nr < nr_pmdmapped)
deferred_split_folio(folio);
}
@@ -1532,6 +1543,43 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
munlock_vma_folio(folio, vma);
}
+/**
+ * folio_remove_rmap_ptes - remove PTE mappings from a page range of a folio
+ * @folio: The folio to remove the mappings from
+ * @page: The first page to remove
+ * @nr_pages: The number of pages that will be removed from the mapping
+ * @vma: The vm area from which the mappings are removed
+ *
+ * The page range of the folio is defined by [page, page + nr_pages)
+ *
+ * The caller needs to hold the page table lock.
+ */
+void folio_remove_rmap_ptes(struct folio *folio, struct page *page,
+ unsigned int nr_pages, struct vm_area_struct *vma)
+{
+ __folio_remove_rmap(folio, page, nr_pages, vma, RMAP_MODE_PTE);
+}
+
+/**
+ * folio_remove_rmap_pmd - remove a PMD mapping from a page range of a folio
+ * @folio: The folio to remove the mapping from
+ * @page: The first page to remove
+ * @vma: The vm area from which the mapping is removed
+ *
+ * The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
+ *
+ * The caller needs to hold the page table lock.
+ */
+void folio_remove_rmap_pmd(struct folio *folio, struct page *page,
+ struct vm_area_struct *vma)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ __folio_remove_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_MODE_PMD);
+#else
+ WARN_ON_ONCE(true);
+#endif
+}
+
/*
* @arg: enum ttu_flags will be passed to this argument
*/