memcontrol: only transfer the memcg data for migration

Message ID 20231003231422.4046187-1-nphamcs@gmail.com
State New
Headers
Series memcontrol: only transfer the memcg data for migration |

Commit Message

Nhat Pham Oct. 3, 2023, 11:14 p.m. UTC
  For most migration use cases, only transfer the memcg data from the old
folio to the new folio, and clear the old folio's memcg data. No
charging and uncharging will be done. These use cases include the new
hugetlb memcg accounting behavior (which was not previously handled).

This shaves off some work on the migration path, and avoids the
temporary double charging of a folio during its migration.

The only exception is replace_page_cache_folio(), which will use the old
mem_cgroup_migrate() (now renamed to mem_cgroup_replace_folio). In that
context, the isolation of the old page isn't quite as thorough as with
migration, so we cannot use our new implementation directly.

This patch is the result of the following discussion on the new hugetlb
memcg accounting behavior:

https://lore.kernel.org/lkml/20231003171329.GB314430@monkey/

Reported-by: Mike Kravetz <mike.kravetz@oracle.com>
Closes: https://lore.kernel.org/lkml/20231003171329.GB314430@monkey/
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
 include/linux/memcontrol.h |  7 ++++++
 mm/filemap.c               |  2 +-
 mm/memcontrol.c            | 45 +++++++++++++++++++++++++++++++++++---
 mm/migrate.c               |  3 +--
 4 files changed, 51 insertions(+), 6 deletions(-)
  

Comments

Yosry Ahmed Oct. 3, 2023, 11:22 p.m. UTC | #1
On Tue, Oct 3, 2023 at 4:14 PM Nhat Pham <nphamcs@gmail.com> wrote:
>
> For most migration use cases, only transfer the memcg data from the old
> folio to the new folio, and clear the old folio's memcg data. No
> charging and uncharging will be done. These use cases include the new
> hugetlb memcg accounting behavior (which was not previously handled).
>
> This shaves off some work on the migration path, and avoids the
> temporary double charging of a folio during its migration.
>
> The only exception is replace_page_cache_folio(), which will use the old
> mem_cgroup_migrate() (now renamed to mem_cgroup_replace_folio). In that
> context, the isolation of the old page isn't quite as thorough as with
> migration, so we cannot use our new implementation directly.
>
> This patch is the result of the following discussion on the new hugetlb
> memcg accounting behavior:
>
> https://lore.kernel.org/lkml/20231003171329.GB314430@monkey/
>
> Reported-by: Mike Kravetz <mike.kravetz@oracle.com>
> Closes: https://lore.kernel.org/lkml/20231003171329.GB314430@monkey/
> Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
> Signed-off-by: Nhat Pham <nphamcs@gmail.com>

Does this patch fit before or after your series? In both cases I think
there might be a problem for bisectability.

> ---
>  include/linux/memcontrol.h |  7 ++++++
>  mm/filemap.c               |  2 +-
>  mm/memcontrol.c            | 45 +++++++++++++++++++++++++++++++++++---
>  mm/migrate.c               |  3 +--
>  4 files changed, 51 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index a827e2129790..e3eaa123256b 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -711,6 +711,8 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
>
>  void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages);
>
> +void mem_cgroup_replace_folio(struct folio *old, struct folio *new);
> +
>  void mem_cgroup_migrate(struct folio *old, struct folio *new);
>
>  /**
> @@ -1294,6 +1296,11 @@ static inline void mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
>  {
>  }
>
> +static inline void mem_cgroup_replace_folio(struct folio *old,
> +               struct folio *new)
> +{
> +}
> +
>  static inline void mem_cgroup_migrate(struct folio *old, struct folio *new)
>  {
>  }
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 9481ffaf24e6..673745219c82 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -819,7 +819,7 @@ void replace_page_cache_folio(struct folio *old, struct folio *new)
>         new->mapping = mapping;
>         new->index = offset;
>
> -       mem_cgroup_migrate(old, new);
> +       mem_cgroup_replace_folio(old, new);
>
>         xas_lock_irq(&xas);
>         xas_store(&xas, new);
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 6660684f6f97..cbaa26605b3d 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -7316,16 +7316,17 @@ void __mem_cgroup_uncharge_list(struct list_head *page_list)
>  }
>
>  /**
> - * mem_cgroup_migrate - Charge a folio's replacement.
> + * mem_cgroup_replace_folio - Charge a folio's replacement.
>   * @old: Currently circulating folio.
>   * @new: Replacement folio.
>   *
>   * Charge @new as a replacement folio for @old. @old will
> - * be uncharged upon free.
> + * be uncharged upon free. This is only used by the page cache
> + * (in replace_page_cache_folio()).
>   *
>   * Both folios must be locked, @new->mapping must be set up.
>   */
> -void mem_cgroup_migrate(struct folio *old, struct folio *new)
> +void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
>  {
>         struct mem_cgroup *memcg;
>         long nr_pages = folio_nr_pages(new);
> @@ -7364,6 +7365,44 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
>         local_irq_restore(flags);
>  }
>
> +/**
> + * mem_cgroup_migrate - Transfer the memcg data from the old to the new folio.
> + * @old: Currently circulating folio.
> + * @new: Replacement folio.
> + *
> + * Transfer the memcg data from the old folio to the new folio for migration.
> + * The old folio's data info will be cleared. Note that the memory counters
> + * will remain unchanged throughout the process.
> + *
> + * Both folios must be locked, @new->mapping must be set up.
> + */
> +void mem_cgroup_migrate(struct folio *old, struct folio *new)
> +{
> +       struct mem_cgroup *memcg;
> +
> +       VM_BUG_ON_FOLIO(!folio_test_locked(old), old);
> +       VM_BUG_ON_FOLIO(!folio_test_locked(new), new);
> +       VM_BUG_ON_FOLIO(folio_test_anon(old) != folio_test_anon(new), new);
> +       VM_BUG_ON_FOLIO(folio_nr_pages(old) != folio_nr_pages(new), new);
> +
> +       if (mem_cgroup_disabled())
> +               return;
> +
> +       memcg = folio_memcg(old);
> +       /*
> +        * Note that it is normal to see !memcg for a hugetlb folio.
> +        * It could have been allocated when memory_hugetlb_accounting was not
> +        * selected, for e.g.
> +        */
> +       VM_WARN_ON_ONCE_FOLIO(!memcg, old);
> +       if (!memcg)
> +               return;
> +
> +       /* Transfer the charge and the css ref */
> +       commit_charge(new, memcg);
> +       old->memcg_data = 0;
> +}
> +
>  DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
>  EXPORT_SYMBOL(memcg_sockets_enabled_key);
>
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 7d1804c4a5d9..6034c7ed1d65 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -633,8 +633,7 @@ void folio_migrate_flags(struct folio *newfolio, struct folio *folio)
>
>         folio_copy_owner(newfolio, folio);
>
> -       if (!folio_test_hugetlb(folio))
> -               mem_cgroup_migrate(folio, newfolio);
> +       mem_cgroup_migrate(folio, newfolio);
>  }
>  EXPORT_SYMBOL(folio_migrate_flags);
>
> --
> 2.34.1
>
  
Nhat Pham Oct. 3, 2023, 11:31 p.m. UTC | #2
On Tue, Oct 3, 2023 at 4:22 PM Yosry Ahmed <yosryahmed@google.com> wrote:
>
> On Tue, Oct 3, 2023 at 4:14 PM Nhat Pham <nphamcs@gmail.com> wrote:
> >
> > For most migration use cases, only transfer the memcg data from the old
> > folio to the new folio, and clear the old folio's memcg data. No
> > charging and uncharging will be done. These use cases include the new
> > hugetlb memcg accounting behavior (which was not previously handled).
> >
> > This shaves off some work on the migration path, and avoids the
> > temporary double charging of a folio during its migration.
> >
> > The only exception is replace_page_cache_folio(), which will use the old
> > mem_cgroup_migrate() (now renamed to mem_cgroup_replace_folio). In that
> > context, the isolation of the old page isn't quite as thorough as with
> > migration, so we cannot use our new implementation directly.
> >
> > This patch is the result of the following discussion on the new hugetlb
> > memcg accounting behavior:
> >
> > https://lore.kernel.org/lkml/20231003171329.GB314430@monkey/
> >
> > Reported-by: Mike Kravetz <mike.kravetz@oracle.com>
> > Closes: https://lore.kernel.org/lkml/20231003171329.GB314430@monkey/
> > Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
> > Signed-off-by: Nhat Pham <nphamcs@gmail.com>
>
> Does this patch fit before or after your series? In both cases I think
> there might be a problem for bisectability.

Hmm my intention for this patch is as a fixlet.
(i.e it should be eventually squashed to the second patch of that series).
I just include the extra context on the fixlet for review purposes.

My apologies - should have been much clearer.
(Perhaps I should just send out v4 at this point?)

>
> > ---
> >  include/linux/memcontrol.h |  7 ++++++
> >  mm/filemap.c               |  2 +-
> >  mm/memcontrol.c            | 45 +++++++++++++++++++++++++++++++++++---
> >  mm/migrate.c               |  3 +--
> >  4 files changed, 51 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> > index a827e2129790..e3eaa123256b 100644
> > --- a/include/linux/memcontrol.h
> > +++ b/include/linux/memcontrol.h
> > @@ -711,6 +711,8 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
> >
> >  void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages);
> >
> > +void mem_cgroup_replace_folio(struct folio *old, struct folio *new);
> > +
> >  void mem_cgroup_migrate(struct folio *old, struct folio *new);
> >
> >  /**
> > @@ -1294,6 +1296,11 @@ static inline void mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
> >  {
> >  }
> >
> > +static inline void mem_cgroup_replace_folio(struct folio *old,
> > +               struct folio *new)
> > +{
> > +}
> > +
> >  static inline void mem_cgroup_migrate(struct folio *old, struct folio *new)
> >  {
> >  }
> > diff --git a/mm/filemap.c b/mm/filemap.c
> > index 9481ffaf24e6..673745219c82 100644
> > --- a/mm/filemap.c
> > +++ b/mm/filemap.c
> > @@ -819,7 +819,7 @@ void replace_page_cache_folio(struct folio *old, struct folio *new)
> >         new->mapping = mapping;
> >         new->index = offset;
> >
> > -       mem_cgroup_migrate(old, new);
> > +       mem_cgroup_replace_folio(old, new);
> >
> >         xas_lock_irq(&xas);
> >         xas_store(&xas, new);
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 6660684f6f97..cbaa26605b3d 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -7316,16 +7316,17 @@ void __mem_cgroup_uncharge_list(struct list_head *page_list)
> >  }
> >
> >  /**
> > - * mem_cgroup_migrate - Charge a folio's replacement.
> > + * mem_cgroup_replace_folio - Charge a folio's replacement.
> >   * @old: Currently circulating folio.
> >   * @new: Replacement folio.
> >   *
> >   * Charge @new as a replacement folio for @old. @old will
> > - * be uncharged upon free.
> > + * be uncharged upon free. This is only used by the page cache
> > + * (in replace_page_cache_folio()).
> >   *
> >   * Both folios must be locked, @new->mapping must be set up.
> >   */
> > -void mem_cgroup_migrate(struct folio *old, struct folio *new)
> > +void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
> >  {
> >         struct mem_cgroup *memcg;
> >         long nr_pages = folio_nr_pages(new);
> > @@ -7364,6 +7365,44 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
> >         local_irq_restore(flags);
> >  }
> >
> > +/**
> > + * mem_cgroup_migrate - Transfer the memcg data from the old to the new folio.
> > + * @old: Currently circulating folio.
> > + * @new: Replacement folio.
> > + *
> > + * Transfer the memcg data from the old folio to the new folio for migration.
> > + * The old folio's data info will be cleared. Note that the memory counters
> > + * will remain unchanged throughout the process.
> > + *
> > + * Both folios must be locked, @new->mapping must be set up.
> > + */
> > +void mem_cgroup_migrate(struct folio *old, struct folio *new)
> > +{
> > +       struct mem_cgroup *memcg;
> > +
> > +       VM_BUG_ON_FOLIO(!folio_test_locked(old), old);
> > +       VM_BUG_ON_FOLIO(!folio_test_locked(new), new);
> > +       VM_BUG_ON_FOLIO(folio_test_anon(old) != folio_test_anon(new), new);
> > +       VM_BUG_ON_FOLIO(folio_nr_pages(old) != folio_nr_pages(new), new);
> > +
> > +       if (mem_cgroup_disabled())
> > +               return;
> > +
> > +       memcg = folio_memcg(old);
> > +       /*
> > +        * Note that it is normal to see !memcg for a hugetlb folio.
> > +        * It could have been allocated when memory_hugetlb_accounting was not
> > +        * selected, for e.g.
> > +        */
> > +       VM_WARN_ON_ONCE_FOLIO(!memcg, old);
> > +       if (!memcg)
> > +               return;
> > +
> > +       /* Transfer the charge and the css ref */
> > +       commit_charge(new, memcg);
> > +       old->memcg_data = 0;
> > +}
> > +
> >  DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
> >  EXPORT_SYMBOL(memcg_sockets_enabled_key);
> >
> > diff --git a/mm/migrate.c b/mm/migrate.c
> > index 7d1804c4a5d9..6034c7ed1d65 100644
> > --- a/mm/migrate.c
> > +++ b/mm/migrate.c
> > @@ -633,8 +633,7 @@ void folio_migrate_flags(struct folio *newfolio, struct folio *folio)
> >
> >         folio_copy_owner(newfolio, folio);
> >
> > -       if (!folio_test_hugetlb(folio))
> > -               mem_cgroup_migrate(folio, newfolio);
> > +       mem_cgroup_migrate(folio, newfolio);
> >  }
> >  EXPORT_SYMBOL(folio_migrate_flags);
> >
> > --
> > 2.34.1
> >
  
Yosry Ahmed Oct. 3, 2023, 11:54 p.m. UTC | #3
On Tue, Oct 3, 2023 at 4:31 PM Nhat Pham <nphamcs@gmail.com> wrote:
>
> On Tue, Oct 3, 2023 at 4:22 PM Yosry Ahmed <yosryahmed@google.com> wrote:
> >
> > On Tue, Oct 3, 2023 at 4:14 PM Nhat Pham <nphamcs@gmail.com> wrote:
> > >
> > > For most migration use cases, only transfer the memcg data from the old
> > > folio to the new folio, and clear the old folio's memcg data. No
> > > charging and uncharging will be done. These use cases include the new
> > > hugetlb memcg accounting behavior (which was not previously handled).
> > >
> > > This shaves off some work on the migration path, and avoids the
> > > temporary double charging of a folio during its migration.
> > >
> > > The only exception is replace_page_cache_folio(), which will use the old
> > > mem_cgroup_migrate() (now renamed to mem_cgroup_replace_folio). In that
> > > context, the isolation of the old page isn't quite as thorough as with
> > > migration, so we cannot use our new implementation directly.
> > >
> > > This patch is the result of the following discussion on the new hugetlb
> > > memcg accounting behavior:
> > >
> > > https://lore.kernel.org/lkml/20231003171329.GB314430@monkey/
> > >
> > > Reported-by: Mike Kravetz <mike.kravetz@oracle.com>
> > > Closes: https://lore.kernel.org/lkml/20231003171329.GB314430@monkey/
> > > Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
> > > Signed-off-by: Nhat Pham <nphamcs@gmail.com>
> >
> > Does this patch fit before or after your series? In both cases I think
> > there might be a problem for bisectability.
>
> Hmm my intention for this patch is as a fixlet.
> (i.e it should be eventually squashed to the second patch of that series).
> I just include the extra context on the fixlet for review purposes.
>
> My apologies - should have been much clearer.
> (Perhaps I should just send out v4 at this point?)
>

It's really up to Andrew, just make it clear what the intention is.

Thanks!
  
Nhat Pham Oct. 4, 2023, 12:02 a.m. UTC | #4
On Tue, Oct 3, 2023 at 4:14 PM Nhat Pham <nphamcs@gmail.com> wrote:
>
> For most migration use cases, only transfer the memcg data from the old
> folio to the new folio, and clear the old folio's memcg data. No
> charging and uncharging will be done. These use cases include the new
> hugetlb memcg accounting behavior (which was not previously handled).
>
> This shaves off some work on the migration path, and avoids the
> temporary double charging of a folio during its migration.
>
> The only exception is replace_page_cache_folio(), which will use the old
> mem_cgroup_migrate() (now renamed to mem_cgroup_replace_folio). In that
> context, the isolation of the old page isn't quite as thorough as with
> migration, so we cannot use our new implementation directly.
>
> This patch is the result of the following discussion on the new hugetlb
> memcg accounting behavior:
>
> https://lore.kernel.org/lkml/20231003171329.GB314430@monkey/

For Andrew:
This fixes the following patch (which has not been merged to stable):

https://lore.kernel.org/lkml/20231003001828.2554080-3-nphamcs@gmail.com/

and should ideally be squashed to it. My apologies for not making it clear
in the changelog. Let me know if you'd like to see a new version instead
of this fixlet.

>
> Reported-by: Mike Kravetz <mike.kravetz@oracle.com>
> Closes: https://lore.kernel.org/lkml/20231003171329.GB314430@monkey/
> Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
> Signed-off-by: Nhat Pham <nphamcs@gmail.com>
> ---
>  include/linux/memcontrol.h |  7 ++++++
>  mm/filemap.c               |  2 +-
>  mm/memcontrol.c            | 45 +++++++++++++++++++++++++++++++++++---
>  mm/migrate.c               |  3 +--
>  4 files changed, 51 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index a827e2129790..e3eaa123256b 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -711,6 +711,8 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
>
>  void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages);
>
> +void mem_cgroup_replace_folio(struct folio *old, struct folio *new);
> +
>  void mem_cgroup_migrate(struct folio *old, struct folio *new);
>
>  /**
> @@ -1294,6 +1296,11 @@ static inline void mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
>  {
>  }
>
> +static inline void mem_cgroup_replace_folio(struct folio *old,
> +               struct folio *new)
> +{
> +}
> +
>  static inline void mem_cgroup_migrate(struct folio *old, struct folio *new)
>  {
>  }
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 9481ffaf24e6..673745219c82 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -819,7 +819,7 @@ void replace_page_cache_folio(struct folio *old, struct folio *new)
>         new->mapping = mapping;
>         new->index = offset;
>
> -       mem_cgroup_migrate(old, new);
> +       mem_cgroup_replace_folio(old, new);
>
>         xas_lock_irq(&xas);
>         xas_store(&xas, new);
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 6660684f6f97..cbaa26605b3d 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -7316,16 +7316,17 @@ void __mem_cgroup_uncharge_list(struct list_head *page_list)
>  }
>
>  /**
> - * mem_cgroup_migrate - Charge a folio's replacement.
> + * mem_cgroup_replace_folio - Charge a folio's replacement.
>   * @old: Currently circulating folio.
>   * @new: Replacement folio.
>   *
>   * Charge @new as a replacement folio for @old. @old will
> - * be uncharged upon free.
> + * be uncharged upon free. This is only used by the page cache
> + * (in replace_page_cache_folio()).
>   *
>   * Both folios must be locked, @new->mapping must be set up.
>   */
> -void mem_cgroup_migrate(struct folio *old, struct folio *new)
> +void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
>  {
>         struct mem_cgroup *memcg;
>         long nr_pages = folio_nr_pages(new);
> @@ -7364,6 +7365,44 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
>         local_irq_restore(flags);
>  }
>
> +/**
> + * mem_cgroup_migrate - Transfer the memcg data from the old to the new folio.
> + * @old: Currently circulating folio.
> + * @new: Replacement folio.
> + *
> + * Transfer the memcg data from the old folio to the new folio for migration.
> + * The old folio's data info will be cleared. Note that the memory counters
> + * will remain unchanged throughout the process.
> + *
> + * Both folios must be locked, @new->mapping must be set up.
> + */
> +void mem_cgroup_migrate(struct folio *old, struct folio *new)
> +{
> +       struct mem_cgroup *memcg;
> +
> +       VM_BUG_ON_FOLIO(!folio_test_locked(old), old);
> +       VM_BUG_ON_FOLIO(!folio_test_locked(new), new);
> +       VM_BUG_ON_FOLIO(folio_test_anon(old) != folio_test_anon(new), new);
> +       VM_BUG_ON_FOLIO(folio_nr_pages(old) != folio_nr_pages(new), new);
> +
> +       if (mem_cgroup_disabled())
> +               return;
> +
> +       memcg = folio_memcg(old);
> +       /*
> +        * Note that it is normal to see !memcg for a hugetlb folio.
> +        * It could have been allocated when memory_hugetlb_accounting was not
> +        * selected, for e.g.
> +        */
> +       VM_WARN_ON_ONCE_FOLIO(!memcg, old);
> +       if (!memcg)
> +               return;
> +
> +       /* Transfer the charge and the css ref */
> +       commit_charge(new, memcg);
> +       old->memcg_data = 0;
> +}
> +
>  DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
>  EXPORT_SYMBOL(memcg_sockets_enabled_key);
>
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 7d1804c4a5d9..6034c7ed1d65 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -633,8 +633,7 @@ void folio_migrate_flags(struct folio *newfolio, struct folio *folio)
>
>         folio_copy_owner(newfolio, folio);
>
> -       if (!folio_test_hugetlb(folio))
> -               mem_cgroup_migrate(folio, newfolio);
> +       mem_cgroup_migrate(folio, newfolio);
>  }
>  EXPORT_SYMBOL(folio_migrate_flags);
>
> --
> 2.34.1
>
  
Nhat Pham Oct. 4, 2023, 12:02 a.m. UTC | #5
On Tue, Oct 3, 2023 at 4:54 PM Yosry Ahmed <yosryahmed@google.com> wrote:
>
> On Tue, Oct 3, 2023 at 4:31 PM Nhat Pham <nphamcs@gmail.com> wrote:
> >
> > On Tue, Oct 3, 2023 at 4:22 PM Yosry Ahmed <yosryahmed@google.com> wrote:
> > >
> > > On Tue, Oct 3, 2023 at 4:14 PM Nhat Pham <nphamcs@gmail.com> wrote:
> > > >
> > > > For most migration use cases, only transfer the memcg data from the old
> > > > folio to the new folio, and clear the old folio's memcg data. No
> > > > charging and uncharging will be done. These use cases include the new
> > > > hugetlb memcg accounting behavior (which was not previously handled).
> > > >
> > > > This shaves off some work on the migration path, and avoids the
> > > > temporary double charging of a folio during its migration.
> > > >
> > > > The only exception is replace_page_cache_folio(), which will use the old
> > > > mem_cgroup_migrate() (now renamed to mem_cgroup_replace_folio). In that
> > > > context, the isolation of the old page isn't quite as thorough as with
> > > > migration, so we cannot use our new implementation directly.
> > > >
> > > > This patch is the result of the following discussion on the new hugetlb
> > > > memcg accounting behavior:
> > > >
> > > > https://lore.kernel.org/lkml/20231003171329.GB314430@monkey/
> > > >
> > > > Reported-by: Mike Kravetz <mike.kravetz@oracle.com>
> > > > Closes: https://lore.kernel.org/lkml/20231003171329.GB314430@monkey/
> > > > Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
> > > > Signed-off-by: Nhat Pham <nphamcs@gmail.com>
> > >
> > > Does this patch fit before or after your series? In both cases I think
> > > there might be a problem for bisectability.
> >
> > Hmm my intention for this patch is as a fixlet.
> > (i.e it should be eventually squashed to the second patch of that series).
> > I just include the extra context on the fixlet for review purposes.
> >
> > My apologies - should have been much clearer.
> > (Perhaps I should just send out v4 at this point?)
> >
>
> It's really up to Andrew, just make it clear what the intention is.

Thanks for reminding me! That was my oversight.

>
> Thanks!
  
Johannes Weiner Oct. 4, 2023, 2:17 p.m. UTC | #6
On Tue, Oct 03, 2023 at 04:14:22PM -0700, Nhat Pham wrote:
> For most migration use cases, only transfer the memcg data from the old
> folio to the new folio, and clear the old folio's memcg data. No
> charging and uncharging will be done. These use cases include the new
> hugetlb memcg accounting behavior (which was not previously handled).
> 
> This shaves off some work on the migration path, and avoids the
> temporary double charging of a folio during its migration.
> 
> The only exception is replace_page_cache_folio(), which will use the old
> mem_cgroup_migrate() (now renamed to mem_cgroup_replace_folio). In that
> context, the isolation of the old page isn't quite as thorough as with
> migration, so we cannot use our new implementation directly.
> 
> This patch is the result of the following discussion on the new hugetlb
> memcg accounting behavior:
> 
> https://lore.kernel.org/lkml/20231003171329.GB314430@monkey/
> 
> Reported-by: Mike Kravetz <mike.kravetz@oracle.com>
> Closes: https://lore.kernel.org/lkml/20231003171329.GB314430@monkey/
> Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
> Signed-off-by: Nhat Pham <nphamcs@gmail.com>

For squashing, the patch title should be:

hugetlb: memcg: account hugetlb-backed memory in memory controller fix

However, I think this should actually be split out. It changes how all
pages are cgroup-migrated, which is a bit too large of a side effect
for the hugetlb accounting patch itself. Especially because the
reasoning outlined above will get lost once this fixup is folded.

IOW, send one prep patch, to go before the series, which splits
mem_cgroup_replace_folio() and does the mem_cgroup_migrate()
optimization() with the above explanation.

Then send a fixlet for the hugetlb accounting patch that removes the
!hugetlb-conditional for the mem_cgroup_migrate() call.

If you're clear in the queueing instructions for both patches, Andrew
can probably do it in-place without having to resend everything :)

> +void mem_cgroup_migrate(struct folio *old, struct folio *new)
> +{
> +	struct mem_cgroup *memcg;
> +
> +	VM_BUG_ON_FOLIO(!folio_test_locked(old), old);
> +	VM_BUG_ON_FOLIO(!folio_test_locked(new), new);
> +	VM_BUG_ON_FOLIO(folio_test_anon(old) != folio_test_anon(new), new);
> +	VM_BUG_ON_FOLIO(folio_nr_pages(old) != folio_nr_pages(new), new);
> +
> +	if (mem_cgroup_disabled())
> +		return;
> +
> +	memcg = folio_memcg(old);
> +	/*
> +	 * Note that it is normal to see !memcg for a hugetlb folio.
> +	 * It could have been allocated when memory_hugetlb_accounting was not
> +	 * selected, for e.g.

Is that sentence truncated?

> +	 */
> +	VM_WARN_ON_ONCE_FOLIO(!memcg, old);
> +	if (!memcg)
> +		return;

If this is expected to happen, it shouldn't warn:

VM_WARN_ON_ONCE(!folio_test_hugetlb(old) && !memcg, old);
  

Patch

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index a827e2129790..e3eaa123256b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -711,6 +711,8 @@  static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
 
 void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages);
 
+void mem_cgroup_replace_folio(struct folio *old, struct folio *new);
+
 void mem_cgroup_migrate(struct folio *old, struct folio *new);
 
 /**
@@ -1294,6 +1296,11 @@  static inline void mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
 {
 }
 
+static inline void mem_cgroup_replace_folio(struct folio *old,
+		struct folio *new)
+{
+}
+
 static inline void mem_cgroup_migrate(struct folio *old, struct folio *new)
 {
 }
diff --git a/mm/filemap.c b/mm/filemap.c
index 9481ffaf24e6..673745219c82 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -819,7 +819,7 @@  void replace_page_cache_folio(struct folio *old, struct folio *new)
 	new->mapping = mapping;
 	new->index = offset;
 
-	mem_cgroup_migrate(old, new);
+	mem_cgroup_replace_folio(old, new);
 
 	xas_lock_irq(&xas);
 	xas_store(&xas, new);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6660684f6f97..cbaa26605b3d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7316,16 +7316,17 @@  void __mem_cgroup_uncharge_list(struct list_head *page_list)
 }
 
 /**
- * mem_cgroup_migrate - Charge a folio's replacement.
+ * mem_cgroup_replace_folio - Charge a folio's replacement.
  * @old: Currently circulating folio.
  * @new: Replacement folio.
  *
  * Charge @new as a replacement folio for @old. @old will
- * be uncharged upon free.
+ * be uncharged upon free. This is only used by the page cache
+ * (in replace_page_cache_folio()).
  *
  * Both folios must be locked, @new->mapping must be set up.
  */
-void mem_cgroup_migrate(struct folio *old, struct folio *new)
+void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
 {
 	struct mem_cgroup *memcg;
 	long nr_pages = folio_nr_pages(new);
@@ -7364,6 +7365,44 @@  void mem_cgroup_migrate(struct folio *old, struct folio *new)
 	local_irq_restore(flags);
 }
 
+/**
+ * mem_cgroup_migrate - Transfer the memcg data from the old to the new folio.
+ * @old: Currently circulating folio.
+ * @new: Replacement folio.
+ *
+ * Transfer the memcg data from the old folio to the new folio for migration.
+ * The old folio's data info will be cleared. Note that the memory counters
+ * will remain unchanged throughout the process.
+ *
+ * Both folios must be locked, @new->mapping must be set up.
+ */
+void mem_cgroup_migrate(struct folio *old, struct folio *new)
+{
+	struct mem_cgroup *memcg;
+
+	VM_BUG_ON_FOLIO(!folio_test_locked(old), old);
+	VM_BUG_ON_FOLIO(!folio_test_locked(new), new);
+	VM_BUG_ON_FOLIO(folio_test_anon(old) != folio_test_anon(new), new);
+	VM_BUG_ON_FOLIO(folio_nr_pages(old) != folio_nr_pages(new), new);
+
+	if (mem_cgroup_disabled())
+		return;
+
+	memcg = folio_memcg(old);
+	/*
+	 * Note that it is normal to see !memcg for a hugetlb folio.
+	 * It could have been allocated when memory_hugetlb_accounting was not
+	 * selected, for e.g.
+	 */
+	VM_WARN_ON_ONCE_FOLIO(!memcg, old);
+	if (!memcg)
+		return;
+
+	/* Transfer the charge and the css ref */
+	commit_charge(new, memcg);
+	old->memcg_data = 0;
+}
+
 DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
 EXPORT_SYMBOL(memcg_sockets_enabled_key);
 
diff --git a/mm/migrate.c b/mm/migrate.c
index 7d1804c4a5d9..6034c7ed1d65 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -633,8 +633,7 @@  void folio_migrate_flags(struct folio *newfolio, struct folio *folio)
 
 	folio_copy_owner(newfolio, folio);
 
-	if (!folio_test_hugetlb(folio))
-		mem_cgroup_migrate(folio, newfolio);
+	mem_cgroup_migrate(folio, newfolio);
 }
 EXPORT_SYMBOL(folio_migrate_flags);