[v2,2/2] memcg: dump memory.stat during cgroup OOM for v1

Message ID 20230428132406.2540811-3-yosryahmed@google.com
State New
Headers
Series memcg: OOM log improvements |

Commit Message

Yosry Ahmed April 28, 2023, 1:24 p.m. UTC
  Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup
OOM") made sure we dump all the stats in memory.stat during a cgroup
OOM, but it also introduced a slight behavioral change. The code used to
print the non-hierarchical v1 cgroup stats for the entire cgroup
subtree, now it only prints the v2 cgroup stats for the cgroup under
OOM.

For cgroup v1 users, this introduces a few problems:
(a) The non-hierarchical stats of the memcg under OOM are no longer
shown.
(b) A couple of v1-only stats (e.g. pgpgin, pgpgout) are no longer
shown.
(c) We show the list of cgroup v2 stats, even in cgroup v1. This list of
stats is not tracked with v1 in mind. While most of the stats seem to be
working on v1, there may be some stats that are not fully or correctly
tracked.

Although OOM log is not set in stone, we should not change it for no
reason. When upgrading the kernel version to a version including
commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup
OOM"), these behavioral changes are noticed in cgroup v1.

The fix is simple. Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat
during cgroup OOM") separated stats formatting from stats display for
v2, to reuse the stats formatting in the OOM logs. Do the same for v1.

Move the v2 specific formatting from memory_stat_format() to
memcg_stat_format(), add memcg1_stat_format() for v1, and make
memory_stat_format() select between them based on cgroup version.
Since memory_stat_show() now works for both v1 & v2, drop
memcg_stat_show().

Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
---
 mm/memcontrol.c | 60 ++++++++++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 25 deletions(-)
  

Comments

Michal Hocko May 3, 2023, 8:50 a.m. UTC | #1
On Fri 28-04-23 13:24:06, Yosry Ahmed wrote:
> Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup
> OOM") made sure we dump all the stats in memory.stat during a cgroup
> OOM, but it also introduced a slight behavioral change. The code used to
> print the non-hierarchical v1 cgroup stats for the entire cgroup
> subtree, now it only prints the v2 cgroup stats for the cgroup under
> OOM.
> 
> For cgroup v1 users, this introduces a few problems:
> (a) The non-hierarchical stats of the memcg under OOM are no longer
> shown.
> (b) A couple of v1-only stats (e.g. pgpgin, pgpgout) are no longer
> shown.
> (c) We show the list of cgroup v2 stats, even in cgroup v1. This list of
> stats is not tracked with v1 in mind. While most of the stats seem to be
> working on v1, there may be some stats that are not fully or correctly
> tracked.
> 
> Although OOM log is not set in stone, we should not change it for no
> reason. When upgrading the kernel version to a version including
> commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup
> OOM"), these behavioral changes are noticed in cgroup v1.
> 
> The fix is simple. Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat
> during cgroup OOM") separated stats formatting from stats display for
> v2, to reuse the stats formatting in the OOM logs. Do the same for v1.
> 
> Move the v2 specific formatting from memory_stat_format() to
> memcg_stat_format(), add memcg1_stat_format() for v1, and make
> memory_stat_format() select between them based on cgroup version.
> Since memory_stat_show() now works for both v1 & v2, drop
> memcg_stat_show().
> 
> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>

Acked-by: Michal Hocko <mhocko@suse.com>
Thanks

> ---
>  mm/memcontrol.c | 60 ++++++++++++++++++++++++++++---------------------
>  1 file changed, 35 insertions(+), 25 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 5922940f92c9..2b492f8d540c 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1551,7 +1551,7 @@ static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg,
>  	return memcg_page_state(memcg, item) * memcg_page_state_unit(item);
>  }
>  
> -static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> +static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
>  {
>  	int i;
>  
> @@ -1604,6 +1604,17 @@ static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
>  	WARN_ON_ONCE(seq_buf_has_overflowed(s));
>  }
>  
> +static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s);
> +
> +static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> +{
> +	if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
> +		memcg_stat_format(memcg, s);
> +	else
> +		memcg1_stat_format(memcg, s);
> +	WARN_ON_ONCE(seq_buf_has_overflowed(s));
> +}
> +
>  #define K(x) ((x) << (PAGE_SHIFT-10))
>  /**
>   * mem_cgroup_print_oom_context: Print OOM information relevant to
> @@ -4078,9 +4089,8 @@ static const unsigned int memcg1_events[] = {
>  	PGMAJFAULT,
>  };
>  
> -static int memcg_stat_show(struct seq_file *m, void *v)
> +static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
>  {
> -	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
>  	unsigned long memory, memsw;
>  	struct mem_cgroup *mi;
>  	unsigned int i;
> @@ -4095,18 +4105,18 @@ static int memcg_stat_show(struct seq_file *m, void *v)
>  		if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
>  			continue;
>  		nr = memcg_page_state_local(memcg, memcg1_stats[i]);
> -		seq_printf(m, "%s %lu\n", memcg1_stat_names[i],
> +		seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i],
>  			   nr * memcg_page_state_unit(memcg1_stats[i]));
>  	}
>  
>  	for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
> -		seq_printf(m, "%s %lu\n", vm_event_name(memcg1_events[i]),
> -			   memcg_events_local(memcg, memcg1_events[i]));
> +		seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg1_events[i]),
> +			       memcg_events_local(memcg, memcg1_events[i]));
>  
>  	for (i = 0; i < NR_LRU_LISTS; i++)
> -		seq_printf(m, "%s %lu\n", lru_list_name(i),
> -			   memcg_page_state_local(memcg, NR_LRU_BASE + i) *
> -			   PAGE_SIZE);
> +		seq_buf_printf(s, "%s %lu\n", lru_list_name(i),
> +			       memcg_page_state_local(memcg, NR_LRU_BASE + i) *
> +			       PAGE_SIZE);
>  
>  	/* Hierarchical information */
>  	memory = memsw = PAGE_COUNTER_MAX;
> @@ -4114,11 +4124,11 @@ static int memcg_stat_show(struct seq_file *m, void *v)
>  		memory = min(memory, READ_ONCE(mi->memory.max));
>  		memsw = min(memsw, READ_ONCE(mi->memsw.max));
>  	}
> -	seq_printf(m, "hierarchical_memory_limit %llu\n",
> -		   (u64)memory * PAGE_SIZE);
> +	seq_buf_printf(s, "hierarchical_memory_limit %llu\n",
> +		       (u64)memory * PAGE_SIZE);
>  	if (do_memsw_account())
> -		seq_printf(m, "hierarchical_memsw_limit %llu\n",
> -			   (u64)memsw * PAGE_SIZE);
> +		seq_buf_printf(s, "hierarchical_memsw_limit %llu\n",
> +			       (u64)memsw * PAGE_SIZE);
>  
>  	for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
>  		unsigned long nr;
> @@ -4126,19 +4136,19 @@ static int memcg_stat_show(struct seq_file *m, void *v)
>  		if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
>  			continue;
>  		nr = memcg_page_state(memcg, memcg1_stats[i]);
> -		seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i],
> +		seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i],
>  			   (u64)nr * memcg_page_state_unit(memcg1_stats[i]));
>  	}
>  
>  	for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
> -		seq_printf(m, "total_%s %llu\n",
> -			   vm_event_name(memcg1_events[i]),
> -			   (u64)memcg_events(memcg, memcg1_events[i]));
> +		seq_buf_printf(s, "total_%s %llu\n",
> +			       vm_event_name(memcg1_events[i]),
> +			       (u64)memcg_events(memcg, memcg1_events[i]));
>  
>  	for (i = 0; i < NR_LRU_LISTS; i++)
> -		seq_printf(m, "total_%s %llu\n", lru_list_name(i),
> -			   (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
> -			   PAGE_SIZE);
> +		seq_buf_printf(s, "total_%s %llu\n", lru_list_name(i),
> +			       (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
> +			       PAGE_SIZE);
>  
>  #ifdef CONFIG_DEBUG_VM
>  	{
> @@ -4153,12 +4163,10 @@ static int memcg_stat_show(struct seq_file *m, void *v)
>  			anon_cost += mz->lruvec.anon_cost;
>  			file_cost += mz->lruvec.file_cost;
>  		}
> -		seq_printf(m, "anon_cost %lu\n", anon_cost);
> -		seq_printf(m, "file_cost %lu\n", file_cost);
> +		seq_buf_printf(s, "anon_cost %lu\n", anon_cost);
> +		seq_buf_printf(s, "file_cost %lu\n", file_cost);
>  	}
>  #endif
> -
> -	return 0;
>  }
>  
>  static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css,
> @@ -4998,6 +5006,8 @@ static int mem_cgroup_slab_show(struct seq_file *m, void *p)
>  }
>  #endif
>  
> +static int memory_stat_show(struct seq_file *m, void *v);
> +
>  static struct cftype mem_cgroup_legacy_files[] = {
>  	{
>  		.name = "usage_in_bytes",
> @@ -5030,7 +5040,7 @@ static struct cftype mem_cgroup_legacy_files[] = {
>  	},
>  	{
>  		.name = "stat",
> -		.seq_show = memcg_stat_show,
> +		.seq_show = memory_stat_show,
>  	},
>  	{
>  		.name = "force_empty",
> -- 
> 2.40.1.495.gc816e09b53d-goog
  
Yosry Ahmed May 3, 2023, 8:52 a.m. UTC | #2
On Wed, May 3, 2023 at 1:50 AM Michal Hocko <mhocko@suse.com> wrote:
>
> On Fri 28-04-23 13:24:06, Yosry Ahmed wrote:
> > Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup
> > OOM") made sure we dump all the stats in memory.stat during a cgroup
> > OOM, but it also introduced a slight behavioral change. The code used to
> > print the non-hierarchical v1 cgroup stats for the entire cgroup
> > subtree, now it only prints the v2 cgroup stats for the cgroup under
> > OOM.
> >
> > For cgroup v1 users, this introduces a few problems:
> > (a) The non-hierarchical stats of the memcg under OOM are no longer
> > shown.
> > (b) A couple of v1-only stats (e.g. pgpgin, pgpgout) are no longer
> > shown.
> > (c) We show the list of cgroup v2 stats, even in cgroup v1. This list of
> > stats is not tracked with v1 in mind. While most of the stats seem to be
> > working on v1, there may be some stats that are not fully or correctly
> > tracked.
> >
> > Although OOM log is not set in stone, we should not change it for no
> > reason. When upgrading the kernel version to a version including
> > commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup
> > OOM"), these behavioral changes are noticed in cgroup v1.
> >
> > The fix is simple. Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat
> > during cgroup OOM") separated stats formatting from stats display for
> > v2, to reuse the stats formatting in the OOM logs. Do the same for v1.
> >
> > Move the v2 specific formatting from memory_stat_format() to
> > memcg_stat_format(), add memcg1_stat_format() for v1, and make
> > memory_stat_format() select between them based on cgroup version.
> > Since memory_stat_show() now works for both v1 & v2, drop
> > memcg_stat_show().
> >
> > Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
>
> Acked-by: Michal Hocko <mhocko@suse.com>
> Thanks

Thanks Michal!

>
> > ---
> >  mm/memcontrol.c | 60 ++++++++++++++++++++++++++++---------------------
> >  1 file changed, 35 insertions(+), 25 deletions(-)
> >
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 5922940f92c9..2b492f8d540c 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -1551,7 +1551,7 @@ static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg,
> >       return memcg_page_state(memcg, item) * memcg_page_state_unit(item);
> >  }
> >
> > -static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> > +static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> >  {
> >       int i;
> >
> > @@ -1604,6 +1604,17 @@ static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> >       WARN_ON_ONCE(seq_buf_has_overflowed(s));
> >  }
> >
> > +static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s);
> > +
> > +static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> > +{
> > +     if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
> > +             memcg_stat_format(memcg, s);
> > +     else
> > +             memcg1_stat_format(memcg, s);
> > +     WARN_ON_ONCE(seq_buf_has_overflowed(s));
> > +}
> > +
> >  #define K(x) ((x) << (PAGE_SHIFT-10))
> >  /**
> >   * mem_cgroup_print_oom_context: Print OOM information relevant to
> > @@ -4078,9 +4089,8 @@ static const unsigned int memcg1_events[] = {
> >       PGMAJFAULT,
> >  };
> >
> > -static int memcg_stat_show(struct seq_file *m, void *v)
> > +static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> >  {
> > -     struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
> >       unsigned long memory, memsw;
> >       struct mem_cgroup *mi;
> >       unsigned int i;
> > @@ -4095,18 +4105,18 @@ static int memcg_stat_show(struct seq_file *m, void *v)
> >               if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
> >                       continue;
> >               nr = memcg_page_state_local(memcg, memcg1_stats[i]);
> > -             seq_printf(m, "%s %lu\n", memcg1_stat_names[i],
> > +             seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i],
> >                          nr * memcg_page_state_unit(memcg1_stats[i]));
> >       }
> >
> >       for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
> > -             seq_printf(m, "%s %lu\n", vm_event_name(memcg1_events[i]),
> > -                        memcg_events_local(memcg, memcg1_events[i]));
> > +             seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg1_events[i]),
> > +                            memcg_events_local(memcg, memcg1_events[i]));
> >
> >       for (i = 0; i < NR_LRU_LISTS; i++)
> > -             seq_printf(m, "%s %lu\n", lru_list_name(i),
> > -                        memcg_page_state_local(memcg, NR_LRU_BASE + i) *
> > -                        PAGE_SIZE);
> > +             seq_buf_printf(s, "%s %lu\n", lru_list_name(i),
> > +                            memcg_page_state_local(memcg, NR_LRU_BASE + i) *
> > +                            PAGE_SIZE);
> >
> >       /* Hierarchical information */
> >       memory = memsw = PAGE_COUNTER_MAX;
> > @@ -4114,11 +4124,11 @@ static int memcg_stat_show(struct seq_file *m, void *v)
> >               memory = min(memory, READ_ONCE(mi->memory.max));
> >               memsw = min(memsw, READ_ONCE(mi->memsw.max));
> >       }
> > -     seq_printf(m, "hierarchical_memory_limit %llu\n",
> > -                (u64)memory * PAGE_SIZE);
> > +     seq_buf_printf(s, "hierarchical_memory_limit %llu\n",
> > +                    (u64)memory * PAGE_SIZE);
> >       if (do_memsw_account())
> > -             seq_printf(m, "hierarchical_memsw_limit %llu\n",
> > -                        (u64)memsw * PAGE_SIZE);
> > +             seq_buf_printf(s, "hierarchical_memsw_limit %llu\n",
> > +                            (u64)memsw * PAGE_SIZE);
> >
> >       for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
> >               unsigned long nr;
> > @@ -4126,19 +4136,19 @@ static int memcg_stat_show(struct seq_file *m, void *v)
> >               if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
> >                       continue;
> >               nr = memcg_page_state(memcg, memcg1_stats[i]);
> > -             seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i],
> > +             seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i],
> >                          (u64)nr * memcg_page_state_unit(memcg1_stats[i]));
> >       }
> >
> >       for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
> > -             seq_printf(m, "total_%s %llu\n",
> > -                        vm_event_name(memcg1_events[i]),
> > -                        (u64)memcg_events(memcg, memcg1_events[i]));
> > +             seq_buf_printf(s, "total_%s %llu\n",
> > +                            vm_event_name(memcg1_events[i]),
> > +                            (u64)memcg_events(memcg, memcg1_events[i]));
> >
> >       for (i = 0; i < NR_LRU_LISTS; i++)
> > -             seq_printf(m, "total_%s %llu\n", lru_list_name(i),
> > -                        (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
> > -                        PAGE_SIZE);
> > +             seq_buf_printf(s, "total_%s %llu\n", lru_list_name(i),
> > +                            (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
> > +                            PAGE_SIZE);
> >
> >  #ifdef CONFIG_DEBUG_VM
> >       {
> > @@ -4153,12 +4163,10 @@ static int memcg_stat_show(struct seq_file *m, void *v)
> >                       anon_cost += mz->lruvec.anon_cost;
> >                       file_cost += mz->lruvec.file_cost;
> >               }
> > -             seq_printf(m, "anon_cost %lu\n", anon_cost);
> > -             seq_printf(m, "file_cost %lu\n", file_cost);
> > +             seq_buf_printf(s, "anon_cost %lu\n", anon_cost);
> > +             seq_buf_printf(s, "file_cost %lu\n", file_cost);
> >       }
> >  #endif
> > -
> > -     return 0;
> >  }
> >
> >  static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css,
> > @@ -4998,6 +5006,8 @@ static int mem_cgroup_slab_show(struct seq_file *m, void *p)
> >  }
> >  #endif
> >
> > +static int memory_stat_show(struct seq_file *m, void *v);
> > +
> >  static struct cftype mem_cgroup_legacy_files[] = {
> >       {
> >               .name = "usage_in_bytes",
> > @@ -5030,7 +5040,7 @@ static struct cftype mem_cgroup_legacy_files[] = {
> >       },
> >       {
> >               .name = "stat",
> > -             .seq_show = memcg_stat_show,
> > +             .seq_show = memory_stat_show,
> >       },
> >       {
> >               .name = "force_empty",
> > --
> > 2.40.1.495.gc816e09b53d-goog
>
> --
> Michal Hocko
> SUSE Labs
  
Shakeel Butt May 3, 2023, 6:04 p.m. UTC | #3
On Fri, Apr 28, 2023 at 01:24:06PM +0000, Yosry Ahmed wrote:
> Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup
> OOM") made sure we dump all the stats in memory.stat during a cgroup
> OOM, but it also introduced a slight behavioral change. The code used to
> print the non-hierarchical v1 cgroup stats for the entire cgroup
> subtree, now it only prints the v2 cgroup stats for the cgroup under
> OOM.
> 
> For cgroup v1 users, this introduces a few problems:
> (a) The non-hierarchical stats of the memcg under OOM are no longer
> shown.
> (b) A couple of v1-only stats (e.g. pgpgin, pgpgout) are no longer
> shown.
> (c) We show the list of cgroup v2 stats, even in cgroup v1. This list of
> stats is not tracked with v1 in mind. While most of the stats seem to be
> working on v1, there may be some stats that are not fully or correctly
> tracked.
> 
> Although OOM log is not set in stone, we should not change it for no
> reason. When upgrading the kernel version to a version including
> commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup
> OOM"), these behavioral changes are noticed in cgroup v1.
> 
> The fix is simple. Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat
> during cgroup OOM") separated stats formatting from stats display for
> v2, to reuse the stats formatting in the OOM logs. Do the same for v1.
> 
> Move the v2 specific formatting from memory_stat_format() to
> memcg_stat_format(), add memcg1_stat_format() for v1, and make
> memory_stat_format() select between them based on cgroup version.
> Since memory_stat_show() now works for both v1 & v2, drop
> memcg_stat_show().
> 
> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>

Acked-by: Shakeel Butt <shakeelb@google.com>
  

Patch

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5922940f92c9..2b492f8d540c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1551,7 +1551,7 @@  static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg,
 	return memcg_page_state(memcg, item) * memcg_page_state_unit(item);
 }
 
-static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
+static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
 {
 	int i;
 
@@ -1604,6 +1604,17 @@  static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
 	WARN_ON_ONCE(seq_buf_has_overflowed(s));
 }
 
+static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s);
+
+static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
+{
+	if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
+		memcg_stat_format(memcg, s);
+	else
+		memcg1_stat_format(memcg, s);
+	WARN_ON_ONCE(seq_buf_has_overflowed(s));
+}
+
 #define K(x) ((x) << (PAGE_SHIFT-10))
 /**
  * mem_cgroup_print_oom_context: Print OOM information relevant to
@@ -4078,9 +4089,8 @@  static const unsigned int memcg1_events[] = {
 	PGMAJFAULT,
 };
 
-static int memcg_stat_show(struct seq_file *m, void *v)
+static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 	unsigned long memory, memsw;
 	struct mem_cgroup *mi;
 	unsigned int i;
@@ -4095,18 +4105,18 @@  static int memcg_stat_show(struct seq_file *m, void *v)
 		if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
 			continue;
 		nr = memcg_page_state_local(memcg, memcg1_stats[i]);
-		seq_printf(m, "%s %lu\n", memcg1_stat_names[i],
+		seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i],
 			   nr * memcg_page_state_unit(memcg1_stats[i]));
 	}
 
 	for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
-		seq_printf(m, "%s %lu\n", vm_event_name(memcg1_events[i]),
-			   memcg_events_local(memcg, memcg1_events[i]));
+		seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg1_events[i]),
+			       memcg_events_local(memcg, memcg1_events[i]));
 
 	for (i = 0; i < NR_LRU_LISTS; i++)
-		seq_printf(m, "%s %lu\n", lru_list_name(i),
-			   memcg_page_state_local(memcg, NR_LRU_BASE + i) *
-			   PAGE_SIZE);
+		seq_buf_printf(s, "%s %lu\n", lru_list_name(i),
+			       memcg_page_state_local(memcg, NR_LRU_BASE + i) *
+			       PAGE_SIZE);
 
 	/* Hierarchical information */
 	memory = memsw = PAGE_COUNTER_MAX;
@@ -4114,11 +4124,11 @@  static int memcg_stat_show(struct seq_file *m, void *v)
 		memory = min(memory, READ_ONCE(mi->memory.max));
 		memsw = min(memsw, READ_ONCE(mi->memsw.max));
 	}
-	seq_printf(m, "hierarchical_memory_limit %llu\n",
-		   (u64)memory * PAGE_SIZE);
+	seq_buf_printf(s, "hierarchical_memory_limit %llu\n",
+		       (u64)memory * PAGE_SIZE);
 	if (do_memsw_account())
-		seq_printf(m, "hierarchical_memsw_limit %llu\n",
-			   (u64)memsw * PAGE_SIZE);
+		seq_buf_printf(s, "hierarchical_memsw_limit %llu\n",
+			       (u64)memsw * PAGE_SIZE);
 
 	for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
 		unsigned long nr;
@@ -4126,19 +4136,19 @@  static int memcg_stat_show(struct seq_file *m, void *v)
 		if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
 			continue;
 		nr = memcg_page_state(memcg, memcg1_stats[i]);
-		seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i],
+		seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i],
 			   (u64)nr * memcg_page_state_unit(memcg1_stats[i]));
 	}
 
 	for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
-		seq_printf(m, "total_%s %llu\n",
-			   vm_event_name(memcg1_events[i]),
-			   (u64)memcg_events(memcg, memcg1_events[i]));
+		seq_buf_printf(s, "total_%s %llu\n",
+			       vm_event_name(memcg1_events[i]),
+			       (u64)memcg_events(memcg, memcg1_events[i]));
 
 	for (i = 0; i < NR_LRU_LISTS; i++)
-		seq_printf(m, "total_%s %llu\n", lru_list_name(i),
-			   (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
-			   PAGE_SIZE);
+		seq_buf_printf(s, "total_%s %llu\n", lru_list_name(i),
+			       (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
+			       PAGE_SIZE);
 
 #ifdef CONFIG_DEBUG_VM
 	{
@@ -4153,12 +4163,10 @@  static int memcg_stat_show(struct seq_file *m, void *v)
 			anon_cost += mz->lruvec.anon_cost;
 			file_cost += mz->lruvec.file_cost;
 		}
-		seq_printf(m, "anon_cost %lu\n", anon_cost);
-		seq_printf(m, "file_cost %lu\n", file_cost);
+		seq_buf_printf(s, "anon_cost %lu\n", anon_cost);
+		seq_buf_printf(s, "file_cost %lu\n", file_cost);
 	}
 #endif
-
-	return 0;
 }
 
 static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css,
@@ -4998,6 +5006,8 @@  static int mem_cgroup_slab_show(struct seq_file *m, void *p)
 }
 #endif
 
+static int memory_stat_show(struct seq_file *m, void *v);
+
 static struct cftype mem_cgroup_legacy_files[] = {
 	{
 		.name = "usage_in_bytes",
@@ -5030,7 +5040,7 @@  static struct cftype mem_cgroup_legacy_files[] = {
 	},
 	{
 		.name = "stat",
-		.seq_show = memcg_stat_show,
+		.seq_show = memory_stat_show,
 	},
 	{
 		.name = "force_empty",