[v2,05/46] rmap: hugetlb: switch from page_dup_file_rmap to page_add_file_rmap

Message ID 20230218002819.1486479-6-jthoughton@google.com
State New
Headers
Series hugetlb: introduce HugeTLB high-granularity mapping |

Commit Message

James Houghton Feb. 18, 2023, 12:27 a.m. UTC
  This only applies to file-backed HugeTLB, and it should be a no-op until
high-granularity mapping is possible. Also update page_remove_rmap to
support the eventual case where !compound && folio_test_hugetlb().

HugeTLB doesn't use LRU or mlock, so we avoid those bits. This also
means we don't need to use subpage_mapcount; if we did, it would
overflow with only a few mappings.

There is still one caller of page_dup_file_rmap left: copy_present_pte,
and it is always called with compound=false in this case.

Signed-off-by: James Houghton <jthoughton@google.com>
  

Comments

Jiaqi Yan March 2, 2023, 1:06 a.m. UTC | #1
On Fri, Feb 17, 2023 at 4:28 PM James Houghton <jthoughton@google.com> wrote:
>
> This only applies to file-backed HugeTLB, and it should be a no-op until
> high-granularity mapping is possible. Also update page_remove_rmap to
> support the eventual case where !compound && folio_test_hugetlb().
>
> HugeTLB doesn't use LRU or mlock, so we avoid those bits. This also
> means we don't need to use subpage_mapcount; if we did, it would
> overflow with only a few mappings.
>
> There is still one caller of page_dup_file_rmap left: copy_present_pte,
> and it is always called with compound=false in this case.
>
> Signed-off-by: James Houghton <jthoughton@google.com>
>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 08004371cfed..6c008c9de80e 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -5077,7 +5077,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
>                          * sleep during the process.
>                          */
>                         if (!PageAnon(ptepage)) {
> -                               page_dup_file_rmap(ptepage, true);
> +                               page_add_file_rmap(ptepage, src_vma, true);
>                         } else if (page_try_dup_anon_rmap(ptepage, true,
>                                                           src_vma)) {
>                                 pte_t src_pte_old = entry;
> @@ -5910,7 +5910,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
>         if (anon_rmap)
>                 hugepage_add_new_anon_rmap(folio, vma, haddr);
>         else
> -               page_dup_file_rmap(&folio->page, true);
> +               page_add_file_rmap(&folio->page, vma, true);
>         new_pte = make_huge_pte(vma, &folio->page, ((vma->vm_flags & VM_WRITE)
>                                 && (vma->vm_flags & VM_SHARED)));
>         /*
> @@ -6301,7 +6301,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
>                 goto out_release_unlock;
>
>         if (folio_in_pagecache)
> -               page_dup_file_rmap(&folio->page, true);
> +               page_add_file_rmap(&folio->page, dst_vma, true);
>         else
>                 hugepage_add_new_anon_rmap(folio, dst_vma, dst_addr);
>
> diff --git a/mm/migrate.c b/mm/migrate.c
> index d3964c414010..b0f87f19b536 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -254,7 +254,7 @@ static bool remove_migration_pte(struct folio *folio,
>                                 hugepage_add_anon_rmap(new, vma, pvmw.address,
>                                                        rmap_flags);
>                         else
> -                               page_dup_file_rmap(new, true);
> +                               page_add_file_rmap(new, vma, true);
>                         set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
>                 } else
>  #endif
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 15ae24585fc4..c010d0af3a82 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c

Given you are making hugetlb's ref/mapcount mechanism to be consistent
with THP, I think the special folio_test_hugetlb checks you added in
this commit will break page_mapped() and folio_mapped() if page/folio
is HGMed. With these checks, folio->_nr_pages_mapped are not properly
increased/decreased.

> @@ -1318,21 +1318,21 @@ void page_add_file_rmap(struct page *page, struct vm_area_struct *vma,
>         int nr = 0, nr_pmdmapped = 0;
>         bool first;
>
> -       VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
> +       VM_BUG_ON_PAGE(compound && !PageTransHuge(page)
> +                               && !folio_test_hugetlb(folio), page);
>
>         /* Is page being mapped by PTE? Is this its first map to be added? */
>         if (likely(!compound)) {
>                 first = atomic_inc_and_test(&page->_mapcount);
>                 nr = first;
> -               if (first && folio_test_large(folio)) {
> +               if (first && folio_test_large(folio)
> +                         && !folio_test_hugetlb(folio)) {

So we should still increment _nr_pages_mapped for hugetlb case here,
and decrement in the corresponding place in page_remove_rmap.

>                         nr = atomic_inc_return_relaxed(mapped);
>                         nr = (nr < COMPOUND_MAPPED);
>                 }
> -       } else if (folio_test_pmd_mappable(folio)) {
> -               /* That test is redundant: it's for safety or to optimize out */
> -
> +       } else {
>                 first = atomic_inc_and_test(&folio->_entire_mapcount);
> -               if (first) {
> +               if (first && !folio_test_hugetlb(folio)) {

Same here: we should still increase _nr_pages_mapped by
COMPOUND_MAPPED and decrease by COMPOUND_MAPPED in the corresponding
place in page_remove_rmap.

>                         nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped);
>                         if (likely(nr < COMPOUND_MAPPED + COMPOUND_MAPPED)) {
>                                 nr_pmdmapped = folio_nr_pages(folio);
> @@ -1347,6 +1347,9 @@ void page_add_file_rmap(struct page *page, struct vm_area_struct *vma,
>                 }
>         }
>
> +       if (folio_test_hugetlb(folio))
> +               return;
> +
>         if (nr_pmdmapped)
>                 __lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
>                         NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
> @@ -1376,8 +1379,7 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
>         VM_BUG_ON_PAGE(compound && !PageHead(page), page);
>
>         /* Hugetlb pages are not counted in NR_*MAPPED */
> -       if (unlikely(folio_test_hugetlb(folio))) {
> -               /* hugetlb pages are always mapped with pmds */
> +       if (unlikely(folio_test_hugetlb(folio)) && compound) {
>                 atomic_dec(&folio->_entire_mapcount);
>                 return;
>         }

This entire if-block should be removed after you remove the
!folio_test_hugetlb checks in page_add_file_rmap.

> @@ -1386,15 +1388,14 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
>         if (likely(!compound)) {
>                 last = atomic_add_negative(-1, &page->_mapcount);
>                 nr = last;
> -               if (last && folio_test_large(folio)) {
> +               if (last && folio_test_large(folio)
> +                        && !folio_test_hugetlb(folio)) {

ditto.

>                         nr = atomic_dec_return_relaxed(mapped);
>                         nr = (nr < COMPOUND_MAPPED);
>                 }
> -       } else if (folio_test_pmd_mappable(folio)) {
> -               /* That test is redundant: it's for safety or to optimize out */
> -
> +       } else {
>                 last = atomic_add_negative(-1, &folio->_entire_mapcount);
> -               if (last) {
> +               if (last && !folio_test_hugetlb(folio)) {

ditto.

>                         nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped);
>                         if (likely(nr < COMPOUND_MAPPED)) {
>                                 nr_pmdmapped = folio_nr_pages(folio);
> @@ -1409,6 +1410,9 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
>                 }
>         }
>
> +       if (folio_test_hugetlb(folio))
> +               return;
> +
>         if (nr_pmdmapped) {
>                 if (folio_test_anon(folio))
>                         idx = NR_ANON_THPS;
> --
> 2.39.2.637.g21b0678d19-goog
>
  
James Houghton March 2, 2023, 3:44 p.m. UTC | #2
On Wed, Mar 1, 2023 at 5:06 PM Jiaqi Yan <jiaqiyan@google.com> wrote:
>
> On Fri, Feb 17, 2023 at 4:28 PM James Houghton <jthoughton@google.com> wrote:
> >
> > This only applies to file-backed HugeTLB, and it should be a no-op until
> > high-granularity mapping is possible. Also update page_remove_rmap to
> > support the eventual case where !compound && folio_test_hugetlb().
> >
> > HugeTLB doesn't use LRU or mlock, so we avoid those bits. This also
> > means we don't need to use subpage_mapcount; if we did, it would
> > overflow with only a few mappings.

This is wrong; I guess I misunderstood the code when I wrote this
commit. subpages_mapcount (now called _nr_pages_mapped) won't overflow
(unless HugeTLB pages could be greater than 16G). It is indeed a bug
not to update _nr_pages_mapped the same way THPs do.

>
> >
> > There is still one caller of page_dup_file_rmap left: copy_present_pte,
> > and it is always called with compound=false in this case.
> >
> > Signed-off-by: James Houghton <jthoughton@google.com>
> >
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index 08004371cfed..6c008c9de80e 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -5077,7 +5077,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
> >                          * sleep during the process.
> >                          */
> >                         if (!PageAnon(ptepage)) {
> > -                               page_dup_file_rmap(ptepage, true);
> > +                               page_add_file_rmap(ptepage, src_vma, true);
> >                         } else if (page_try_dup_anon_rmap(ptepage, true,
> >                                                           src_vma)) {
> >                                 pte_t src_pte_old = entry;
> > @@ -5910,7 +5910,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
> >         if (anon_rmap)
> >                 hugepage_add_new_anon_rmap(folio, vma, haddr);
> >         else
> > -               page_dup_file_rmap(&folio->page, true);
> > +               page_add_file_rmap(&folio->page, vma, true);
> >         new_pte = make_huge_pte(vma, &folio->page, ((vma->vm_flags & VM_WRITE)
> >                                 && (vma->vm_flags & VM_SHARED)));
> >         /*
> > @@ -6301,7 +6301,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
> >                 goto out_release_unlock;
> >
> >         if (folio_in_pagecache)
> > -               page_dup_file_rmap(&folio->page, true);
> > +               page_add_file_rmap(&folio->page, dst_vma, true);
> >         else
> >                 hugepage_add_new_anon_rmap(folio, dst_vma, dst_addr);
> >
> > diff --git a/mm/migrate.c b/mm/migrate.c
> > index d3964c414010..b0f87f19b536 100644
> > --- a/mm/migrate.c
> > +++ b/mm/migrate.c
> > @@ -254,7 +254,7 @@ static bool remove_migration_pte(struct folio *folio,
> >                                 hugepage_add_anon_rmap(new, vma, pvmw.address,
> >                                                        rmap_flags);
> >                         else
> > -                               page_dup_file_rmap(new, true);
> > +                               page_add_file_rmap(new, vma, true);
> >                         set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
> >                 } else
> >  #endif
> > diff --git a/mm/rmap.c b/mm/rmap.c
> > index 15ae24585fc4..c010d0af3a82 100644
> > --- a/mm/rmap.c
> > +++ b/mm/rmap.c
>
> Given you are making hugetlb's ref/mapcount mechanism to be consistent
> with THP, I think the special folio_test_hugetlb checks you added in
> this commit will break page_mapped() and folio_mapped() if page/folio
> is HGMed. With these checks, folio->_nr_pages_mapped are not properly
> increased/decreased.

Thank you, Jiaqi! I didn't realize I broke
folio_mapped()/page_mapped(). The end result is that page_mapped() may
report that an HGMed page isn't mapped when it is. Not good!

>
> > @@ -1318,21 +1318,21 @@ void page_add_file_rmap(struct page *page, struct vm_area_struct *vma,
> >         int nr = 0, nr_pmdmapped = 0;
> >         bool first;
> >
> > -       VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
> > +       VM_BUG_ON_PAGE(compound && !PageTransHuge(page)
> > +                               && !folio_test_hugetlb(folio), page);
> >
> >         /* Is page being mapped by PTE? Is this its first map to be added? */
> >         if (likely(!compound)) {
> >                 first = atomic_inc_and_test(&page->_mapcount);
> >                 nr = first;
> > -               if (first && folio_test_large(folio)) {
> > +               if (first && folio_test_large(folio)
> > +                         && !folio_test_hugetlb(folio)) {
>
> So we should still increment _nr_pages_mapped for hugetlb case here,
> and decrement in the corresponding place in page_remove_rmap.
>
> >                         nr = atomic_inc_return_relaxed(mapped);
> >                         nr = (nr < COMPOUND_MAPPED);
> >                 }
> > -       } else if (folio_test_pmd_mappable(folio)) {
> > -               /* That test is redundant: it's for safety or to optimize out */
> > -
> > +       } else {
> >                 first = atomic_inc_and_test(&folio->_entire_mapcount);
> > -               if (first) {
> > +               if (first && !folio_test_hugetlb(folio)) {
>
> Same here: we should still increase _nr_pages_mapped by
> COMPOUND_MAPPED and decrease by COMPOUND_MAPPED in the corresponding
> place in page_remove_rmap.
>
> >                         nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped);
> >                         if (likely(nr < COMPOUND_MAPPED + COMPOUND_MAPPED)) {
> >                                 nr_pmdmapped = folio_nr_pages(folio);
> > @@ -1347,6 +1347,9 @@ void page_add_file_rmap(struct page *page, struct vm_area_struct *vma,
> >                 }
> >         }
> >
> > +       if (folio_test_hugetlb(folio))
> > +               return;
> > +
> >         if (nr_pmdmapped)
> >                 __lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
> >                         NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
> > @@ -1376,8 +1379,7 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
> >         VM_BUG_ON_PAGE(compound && !PageHead(page), page);
> >
> >         /* Hugetlb pages are not counted in NR_*MAPPED */
> > -       if (unlikely(folio_test_hugetlb(folio))) {
> > -               /* hugetlb pages are always mapped with pmds */
> > +       if (unlikely(folio_test_hugetlb(folio)) && compound) {
> >                 atomic_dec(&folio->_entire_mapcount);
> >                 return;
> >         }
>
> This entire if-block should be removed after you remove the
> !folio_test_hugetlb checks in page_add_file_rmap.

This is the not-so-obvious change that is needed. Thank you!

>
> > @@ -1386,15 +1388,14 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
> >         if (likely(!compound)) {
> >                 last = atomic_add_negative(-1, &page->_mapcount);
> >                 nr = last;
> > -               if (last && folio_test_large(folio)) {
> > +               if (last && folio_test_large(folio)
> > +                        && !folio_test_hugetlb(folio)) {
>
> ditto.
>
> >                         nr = atomic_dec_return_relaxed(mapped);
> >                         nr = (nr < COMPOUND_MAPPED);
> >                 }
> > -       } else if (folio_test_pmd_mappable(folio)) {
> > -               /* That test is redundant: it's for safety or to optimize out */
> > -
> > +       } else {
> >                 last = atomic_add_negative(-1, &folio->_entire_mapcount);
> > -               if (last) {
> > +               if (last && !folio_test_hugetlb(folio)) {
>
> ditto.

I agree with all of your suggestions. Testing with the hugetlb-hgm
selftest, nothing seems to break. :)

Given that this is at least the third or fourth major bug in this
version of the series, I'll go ahead and send a v3 sooner rather than
later.

>
> >                         nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped);
> >                         if (likely(nr < COMPOUND_MAPPED)) {
> >                                 nr_pmdmapped = folio_nr_pages(folio);
> > @@ -1409,6 +1410,9 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
> >                 }
> >         }
> >
> > +       if (folio_test_hugetlb(folio))
> > +               return;
> > +
> >         if (nr_pmdmapped) {
> >                 if (folio_test_anon(folio))
> >                         idx = NR_ANON_THPS;
> > --
> > 2.39.2.637.g21b0678d19-goog
> >
  
James Houghton March 2, 2023, 4:43 p.m. UTC | #3
On Thu, Mar 2, 2023 at 7:44 AM James Houghton <jthoughton@google.com> wrote:
>
> On Wed, Mar 1, 2023 at 5:06 PM Jiaqi Yan <jiaqiyan@google.com> wrote:
> >
> > On Fri, Feb 17, 2023 at 4:28 PM James Houghton <jthoughton@google.com> wrote:
> > >
> > > This only applies to file-backed HugeTLB, and it should be a no-op until
> > > high-granularity mapping is possible. Also update page_remove_rmap to
> > > support the eventual case where !compound && folio_test_hugetlb().
> > >
> > > HugeTLB doesn't use LRU or mlock, so we avoid those bits. This also
> > > means we don't need to use subpage_mapcount; if we did, it would
> > > overflow with only a few mappings.
>
> This is wrong; I guess I misunderstood the code when I wrote this
> commit. subpages_mapcount (now called _nr_pages_mapped) won't overflow
> (unless HugeTLB pages could be greater than 16G). It is indeed a bug
> not to update _nr_pages_mapped the same way THPs do.
>
> >
> > >
> > > There is still one caller of page_dup_file_rmap left: copy_present_pte,
> > > and it is always called with compound=false in this case.
> > >
> > > Signed-off-by: James Houghton <jthoughton@google.com>
> > >
> > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > > index 08004371cfed..6c008c9de80e 100644
> > > --- a/mm/hugetlb.c
> > > +++ b/mm/hugetlb.c
> > > @@ -5077,7 +5077,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
> > >                          * sleep during the process.
> > >                          */
> > >                         if (!PageAnon(ptepage)) {
> > > -                               page_dup_file_rmap(ptepage, true);
> > > +                               page_add_file_rmap(ptepage, src_vma, true);
> > >                         } else if (page_try_dup_anon_rmap(ptepage, true,
> > >                                                           src_vma)) {
> > >                                 pte_t src_pte_old = entry;
> > > @@ -5910,7 +5910,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
> > >         if (anon_rmap)
> > >                 hugepage_add_new_anon_rmap(folio, vma, haddr);
> > >         else
> > > -               page_dup_file_rmap(&folio->page, true);
> > > +               page_add_file_rmap(&folio->page, vma, true);
> > >         new_pte = make_huge_pte(vma, &folio->page, ((vma->vm_flags & VM_WRITE)
> > >                                 && (vma->vm_flags & VM_SHARED)));
> > >         /*
> > > @@ -6301,7 +6301,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
> > >                 goto out_release_unlock;
> > >
> > >         if (folio_in_pagecache)
> > > -               page_dup_file_rmap(&folio->page, true);
> > > +               page_add_file_rmap(&folio->page, dst_vma, true);
> > >         else
> > >                 hugepage_add_new_anon_rmap(folio, dst_vma, dst_addr);
> > >
> > > diff --git a/mm/migrate.c b/mm/migrate.c
> > > index d3964c414010..b0f87f19b536 100644
> > > --- a/mm/migrate.c
> > > +++ b/mm/migrate.c
> > > @@ -254,7 +254,7 @@ static bool remove_migration_pte(struct folio *folio,
> > >                                 hugepage_add_anon_rmap(new, vma, pvmw.address,
> > >                                                        rmap_flags);
> > >                         else
> > > -                               page_dup_file_rmap(new, true);
> > > +                               page_add_file_rmap(new, vma, true);
> > >                         set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
> > >                 } else
> > >  #endif
> > > diff --git a/mm/rmap.c b/mm/rmap.c
> > > index 15ae24585fc4..c010d0af3a82 100644
> > > --- a/mm/rmap.c
> > > +++ b/mm/rmap.c
> >
> > Given you are making hugetlb's ref/mapcount mechanism to be consistent
> > with THP, I think the special folio_test_hugetlb checks you added in
> > this commit will break page_mapped() and folio_mapped() if page/folio
> > is HGMed. With these checks, folio->_nr_pages_mapped are not properly
> > increased/decreased.
>
> Thank you, Jiaqi! I didn't realize I broke
> folio_mapped()/page_mapped(). The end result is that page_mapped() may
> report that an HGMed page isn't mapped when it is. Not good!
>
> >
> > > @@ -1318,21 +1318,21 @@ void page_add_file_rmap(struct page *page, struct vm_area_struct *vma,
> > >         int nr = 0, nr_pmdmapped = 0;
> > >         bool first;
> > >
> > > -       VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
> > > +       VM_BUG_ON_PAGE(compound && !PageTransHuge(page)
> > > +                               && !folio_test_hugetlb(folio), page);
> > >
> > >         /* Is page being mapped by PTE? Is this its first map to be added? */
> > >         if (likely(!compound)) {
> > >                 first = atomic_inc_and_test(&page->_mapcount);
> > >                 nr = first;
> > > -               if (first && folio_test_large(folio)) {
> > > +               if (first && folio_test_large(folio)
> > > +                         && !folio_test_hugetlb(folio)) {
> >
> > So we should still increment _nr_pages_mapped for hugetlb case here,
> > and decrement in the corresponding place in page_remove_rmap.
> >
> > >                         nr = atomic_inc_return_relaxed(mapped);
> > >                         nr = (nr < COMPOUND_MAPPED);
> > >                 }
> > > -       } else if (folio_test_pmd_mappable(folio)) {
> > > -               /* That test is redundant: it's for safety or to optimize out */
> > > -
> > > +       } else {
> > >                 first = atomic_inc_and_test(&folio->_entire_mapcount);
> > > -               if (first) {
> > > +               if (first && !folio_test_hugetlb(folio)) {
> >
> > Same here: we should still increase _nr_pages_mapped by
> > COMPOUND_MAPPED and decrease by COMPOUND_MAPPED in the corresponding
> > place in page_remove_rmap.
> >
> > >                         nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped);
> > >                         if (likely(nr < COMPOUND_MAPPED + COMPOUND_MAPPED)) {
> > >                                 nr_pmdmapped = folio_nr_pages(folio);
> > > @@ -1347,6 +1347,9 @@ void page_add_file_rmap(struct page *page, struct vm_area_struct *vma,
> > >                 }
> > >         }
> > >
> > > +       if (folio_test_hugetlb(folio))
> > > +               return;
> > > +
> > >         if (nr_pmdmapped)
> > >                 __lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
> > >                         NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
> > > @@ -1376,8 +1379,7 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
> > >         VM_BUG_ON_PAGE(compound && !PageHead(page), page);
> > >
> > >         /* Hugetlb pages are not counted in NR_*MAPPED */
> > > -       if (unlikely(folio_test_hugetlb(folio))) {
> > > -               /* hugetlb pages are always mapped with pmds */
> > > +       if (unlikely(folio_test_hugetlb(folio)) && compound) {
> > >                 atomic_dec(&folio->_entire_mapcount);
> > >                 return;
> > >         }
> >
> > This entire if-block should be removed after you remove the
> > !folio_test_hugetlb checks in page_add_file_rmap.
>
> This is the not-so-obvious change that is needed. Thank you!
>
> >
> > > @@ -1386,15 +1388,14 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
> > >         if (likely(!compound)) {
> > >                 last = atomic_add_negative(-1, &page->_mapcount);
> > >                 nr = last;
> > > -               if (last && folio_test_large(folio)) {
> > > +               if (last && folio_test_large(folio)
> > > +                        && !folio_test_hugetlb(folio)) {
> >
> > ditto.
> >
> > >                         nr = atomic_dec_return_relaxed(mapped);
> > >                         nr = (nr < COMPOUND_MAPPED);
> > >                 }
> > > -       } else if (folio_test_pmd_mappable(folio)) {
> > > -               /* That test is redundant: it's for safety or to optimize out */
> > > -
> > > +       } else {
> > >                 last = atomic_add_negative(-1, &folio->_entire_mapcount);
> > > -               if (last) {
> > > +               if (last && !folio_test_hugetlb(folio)) {
> >
> > ditto.
>
> I agree with all of your suggestions. Testing with the hugetlb-hgm
> selftest, nothing seems to break. :)
>
> Given that this is at least the third or fourth major bug in this
> version of the series, I'll go ahead and send a v3 sooner rather than
> later.

This solution limits the size of a HugeTLB page to 16G. I'm not sure
if there are any architectures that support HugeTLB pages larger than
16G (it looks like powerpc supports 16G). If they do, I think we can
just increase the value of COMPOUND_MAPPED. If that's not possible, we
would need another solution than participating in _nr_pages_mapped
like THPs.
  
Mike Kravetz March 2, 2023, 7:22 p.m. UTC | #4
On 03/02/23 08:43, James Houghton wrote:
> 
> This solution limits the size of a HugeTLB page to 16G. I'm not sure
> if there are any architectures that support HugeTLB pages larger than
> 16G (it looks like powerpc supports 16G). If they do, I think we can
> just increase the value of COMPOUND_MAPPED. If that's not possible, we
> would need another solution than participating in _nr_pages_mapped
> like THPs.

For now, I believe you should continue with the THP like approach.

A couple days back, Matthew asked me to take a look and comment on his
latest mapcount proposal.  I have not done that yet. :( The hope is that
this will make hugetlb HGM simpler as well.

It is somewhat troubling that mapcount may be changed in the near future.
Better to be consistent with THP than invent something hugetlb HGM specific.
That way, when THP is updated hugetlb updates should be nearly the same.
  

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 08004371cfed..6c008c9de80e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5077,7 +5077,7 @@  int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 			 * sleep during the process.
 			 */
 			if (!PageAnon(ptepage)) {
-				page_dup_file_rmap(ptepage, true);
+				page_add_file_rmap(ptepage, src_vma, true);
 			} else if (page_try_dup_anon_rmap(ptepage, true,
 							  src_vma)) {
 				pte_t src_pte_old = entry;
@@ -5910,7 +5910,7 @@  static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
 	if (anon_rmap)
 		hugepage_add_new_anon_rmap(folio, vma, haddr);
 	else
-		page_dup_file_rmap(&folio->page, true);
+		page_add_file_rmap(&folio->page, vma, true);
 	new_pte = make_huge_pte(vma, &folio->page, ((vma->vm_flags & VM_WRITE)
 				&& (vma->vm_flags & VM_SHARED)));
 	/*
@@ -6301,7 +6301,7 @@  int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
 		goto out_release_unlock;
 
 	if (folio_in_pagecache)
-		page_dup_file_rmap(&folio->page, true);
+		page_add_file_rmap(&folio->page, dst_vma, true);
 	else
 		hugepage_add_new_anon_rmap(folio, dst_vma, dst_addr);
 
diff --git a/mm/migrate.c b/mm/migrate.c
index d3964c414010..b0f87f19b536 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -254,7 +254,7 @@  static bool remove_migration_pte(struct folio *folio,
 				hugepage_add_anon_rmap(new, vma, pvmw.address,
 						       rmap_flags);
 			else
-				page_dup_file_rmap(new, true);
+				page_add_file_rmap(new, vma, true);
 			set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
 		} else
 #endif
diff --git a/mm/rmap.c b/mm/rmap.c
index 15ae24585fc4..c010d0af3a82 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1318,21 +1318,21 @@  void page_add_file_rmap(struct page *page, struct vm_area_struct *vma,
 	int nr = 0, nr_pmdmapped = 0;
 	bool first;
 
-	VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
+	VM_BUG_ON_PAGE(compound && !PageTransHuge(page)
+				&& !folio_test_hugetlb(folio), page);
 
 	/* Is page being mapped by PTE? Is this its first map to be added? */
 	if (likely(!compound)) {
 		first = atomic_inc_and_test(&page->_mapcount);
 		nr = first;
-		if (first && folio_test_large(folio)) {
+		if (first && folio_test_large(folio)
+			  && !folio_test_hugetlb(folio)) {
 			nr = atomic_inc_return_relaxed(mapped);
 			nr = (nr < COMPOUND_MAPPED);
 		}
-	} else if (folio_test_pmd_mappable(folio)) {
-		/* That test is redundant: it's for safety or to optimize out */
-
+	} else {
 		first = atomic_inc_and_test(&folio->_entire_mapcount);
-		if (first) {
+		if (first && !folio_test_hugetlb(folio)) {
 			nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped);
 			if (likely(nr < COMPOUND_MAPPED + COMPOUND_MAPPED)) {
 				nr_pmdmapped = folio_nr_pages(folio);
@@ -1347,6 +1347,9 @@  void page_add_file_rmap(struct page *page, struct vm_area_struct *vma,
 		}
 	}
 
+	if (folio_test_hugetlb(folio))
+		return;
+
 	if (nr_pmdmapped)
 		__lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
 			NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
@@ -1376,8 +1379,7 @@  void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
 	VM_BUG_ON_PAGE(compound && !PageHead(page), page);
 
 	/* Hugetlb pages are not counted in NR_*MAPPED */
-	if (unlikely(folio_test_hugetlb(folio))) {
-		/* hugetlb pages are always mapped with pmds */
+	if (unlikely(folio_test_hugetlb(folio)) && compound) {
 		atomic_dec(&folio->_entire_mapcount);
 		return;
 	}
@@ -1386,15 +1388,14 @@  void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
 	if (likely(!compound)) {
 		last = atomic_add_negative(-1, &page->_mapcount);
 		nr = last;
-		if (last && folio_test_large(folio)) {
+		if (last && folio_test_large(folio)
+			 && !folio_test_hugetlb(folio)) {
 			nr = atomic_dec_return_relaxed(mapped);
 			nr = (nr < COMPOUND_MAPPED);
 		}
-	} else if (folio_test_pmd_mappable(folio)) {
-		/* That test is redundant: it's for safety or to optimize out */
-
+	} else {
 		last = atomic_add_negative(-1, &folio->_entire_mapcount);
-		if (last) {
+		if (last && !folio_test_hugetlb(folio)) {
 			nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped);
 			if (likely(nr < COMPOUND_MAPPED)) {
 				nr_pmdmapped = folio_nr_pages(folio);
@@ -1409,6 +1410,9 @@  void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
 		}
 	}
 
+	if (folio_test_hugetlb(folio))
+		return;
+
 	if (nr_pmdmapped) {
 		if (folio_test_anon(folio))
 			idx = NR_ANON_THPS;