[v3,3/5] mm: memcg: add per-memcg zswap writeback stat

Message ID 20231017232152.2605440-4-nphamcs@gmail.com
State New
Headers
Series workload-specific and memory pressure-driven zswap writeback |

Commit Message

Nhat Pham Oct. 17, 2023, 11:21 p.m. UTC
  From: Domenico Cerasuolo <cerasuolodomenico@gmail.com>

Since zswap now writes back pages from memcg-specific LRUs, we now need a
new stat to show writebacks count for each memcg.

Suggested-by: Nhat Pham <nphamcs@gmail.com>
Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
 include/linux/memcontrol.h |  2 ++
 mm/memcontrol.c            | 15 +++++++++++++++
 mm/zswap.c                 |  3 +++
 3 files changed, 20 insertions(+)
  

Comments

Nhat Pham Oct. 17, 2023, 11:35 p.m. UTC | #1
On Tue, Oct 17, 2023 at 4:21 PM Nhat Pham <nphamcs@gmail.com> wrote:
>
> From: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
>
> Since zswap now writes back pages from memcg-specific LRUs, we now need a
> new stat to show writebacks count for each memcg.
>
> Suggested-by: Nhat Pham <nphamcs@gmail.com>
> Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
> Signed-off-by: Nhat Pham <nphamcs@gmail.com>

/s/Signed-off/Acked
This is Domenico's work :) I used the wrong tag here. Should be:
Acked-by: Nhat Pham <nphamcs@gmail.com>

> ---
>  include/linux/memcontrol.h |  2 ++
>  mm/memcontrol.c            | 15 +++++++++++++++
>  mm/zswap.c                 |  3 +++
>  3 files changed, 20 insertions(+)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 3de10fabea0f..7868b1e00bf5 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -38,6 +38,7 @@ enum memcg_stat_item {
>         MEMCG_KMEM,
>         MEMCG_ZSWAP_B,
>         MEMCG_ZSWAPPED,
> +       MEMCG_ZSWAP_WB,
>         MEMCG_NR_STAT,
>  };
>
> @@ -1884,6 +1885,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg,
>  bool obj_cgroup_may_zswap(struct obj_cgroup *objcg);
>  void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size);
>  void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size);
> +void obj_cgroup_report_zswap_wb(struct obj_cgroup *objcg);
>  #else
>  static inline bool obj_cgroup_may_zswap(struct obj_cgroup *objcg)
>  {
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 1bde67b29287..a9118871e5a6 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1505,6 +1505,7 @@ static const struct memory_stat memory_stats[] = {
>  #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
>         { "zswap",                      MEMCG_ZSWAP_B                   },
>         { "zswapped",                   MEMCG_ZSWAPPED                  },
> +       { "zswap_wb",                   MEMCG_ZSWAP_WB                  },
>  #endif
>         { "file_mapped",                NR_FILE_MAPPED                  },
>         { "file_dirty",                 NR_FILE_DIRTY                   },
> @@ -1541,6 +1542,7 @@ static int memcg_page_state_unit(int item)
>         switch (item) {
>         case MEMCG_PERCPU_B:
>         case MEMCG_ZSWAP_B:
> +       case MEMCG_ZSWAP_WB:
>         case NR_SLAB_RECLAIMABLE_B:
>         case NR_SLAB_UNRECLAIMABLE_B:
>         case WORKINGSET_REFAULT_ANON:
> @@ -7861,6 +7863,19 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
>         rcu_read_unlock();
>  }
>
> +void obj_cgroup_report_zswap_wb(struct obj_cgroup *objcg)
> +{
> +       struct mem_cgroup *memcg;
> +
> +       if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
> +               return;
> +
> +       rcu_read_lock();
> +       memcg = obj_cgroup_memcg(objcg);
> +       mod_memcg_state(memcg, MEMCG_ZSWAP_WB, 1);
> +       rcu_read_unlock();
> +}
> +
>  static u64 zswap_current_read(struct cgroup_subsys_state *css,
>                               struct cftype *cft)
>  {
> diff --git a/mm/zswap.c b/mm/zswap.c
> index d2989ad11814..15485427e3fa 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -704,6 +704,9 @@ static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_o
>         }
>         zswap_written_back_pages++;
>
> +       if (entry->objcg)
> +               obj_cgroup_report_zswap_wb(entry->objcg);
> +
>         /*
>          * Writeback started successfully, the page now belongs to the
>          * swapcache. Drop the entry from zswap - unless invalidate already
> --
> 2.34.1
  
Jeff Johnson Oct. 17, 2023, 11:37 p.m. UTC | #2
On 10/17/2023 4:35 PM, Nhat Pham wrote:
> On Tue, Oct 17, 2023 at 4:21 PM Nhat Pham <nphamcs@gmail.com> wrote:
>>
>> From: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
>>
>> Since zswap now writes back pages from memcg-specific LRUs, we now need a
>> new stat to show writebacks count for each memcg.
>>
>> Suggested-by: Nhat Pham <nphamcs@gmail.com>
>> Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
>> Signed-off-by: Nhat Pham <nphamcs@gmail.com>
> 
> /s/Signed-off/Acked
> This is Domenico's work :) I used the wrong tag here. Should be:
> Acked-by: Nhat Pham <nphamcs@gmail.com>

no, since you are posting the patch, you have to sign off on it.
Signed-off-by is correct
  
Nhat Pham Oct. 17, 2023, 11:40 p.m. UTC | #3
On Tue, Oct 17, 2023 at 4:38 PM Jeff Johnson <quic_jjohnson@quicinc.com> wrote:
>
> On 10/17/2023 4:35 PM, Nhat Pham wrote:
> > On Tue, Oct 17, 2023 at 4:21 PM Nhat Pham <nphamcs@gmail.com> wrote:
> >>
> >> From: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
> >>
> >> Since zswap now writes back pages from memcg-specific LRUs, we now need a
> >> new stat to show writebacks count for each memcg.
> >>
> >> Suggested-by: Nhat Pham <nphamcs@gmail.com>
> >> Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
> >> Signed-off-by: Nhat Pham <nphamcs@gmail.com>
> >
> > /s/Signed-off/Acked
> > This is Domenico's work :) I used the wrong tag here. Should be:
> > Acked-by: Nhat Pham <nphamcs@gmail.com>
>
> no, since you are posting the patch, you have to sign off on it.
> Signed-off-by is correct
>

Ah so past Nhat was right all along. Ignore my comments then.
Thanks for letting me know, Jeff!
  
Yosry Ahmed Oct. 18, 2023, 11:24 p.m. UTC | #4
On Tue, Oct 17, 2023 at 4:21 PM Nhat Pham <nphamcs@gmail.com> wrote:
>
> From: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
>
> Since zswap now writes back pages from memcg-specific LRUs, we now need a
> new stat to show writebacks count for each memcg.
>
> Suggested-by: Nhat Pham <nphamcs@gmail.com>
> Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
> Signed-off-by: Nhat Pham <nphamcs@gmail.com>
> ---
>  include/linux/memcontrol.h |  2 ++
>  mm/memcontrol.c            | 15 +++++++++++++++
>  mm/zswap.c                 |  3 +++
>  3 files changed, 20 insertions(+)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 3de10fabea0f..7868b1e00bf5 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -38,6 +38,7 @@ enum memcg_stat_item {
>         MEMCG_KMEM,
>         MEMCG_ZSWAP_B,
>         MEMCG_ZSWAPPED,
> +       MEMCG_ZSWAP_WB,
>         MEMCG_NR_STAT,
>  };
>
> @@ -1884,6 +1885,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg,
>  bool obj_cgroup_may_zswap(struct obj_cgroup *objcg);
>  void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size);
>  void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size);
> +void obj_cgroup_report_zswap_wb(struct obj_cgroup *objcg);
>  #else
>  static inline bool obj_cgroup_may_zswap(struct obj_cgroup *objcg)
>  {
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 1bde67b29287..a9118871e5a6 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1505,6 +1505,7 @@ static const struct memory_stat memory_stats[] = {
>  #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
>         { "zswap",                      MEMCG_ZSWAP_B                   },
>         { "zswapped",                   MEMCG_ZSWAPPED                  },
> +       { "zswap_wb",                   MEMCG_ZSWAP_WB                  },

zswap_writeback would be more consistent with file_writeback below.

Taking a step back, this is not really a "state". We increment it by 1
every time and never decrement it. Sounds awfully similar to events :)

You can also use count_objcg_event() directly and avoid the need for
obj_cgroup_report_zswap_wb() below.

>  #endif
>         { "file_mapped",                NR_FILE_MAPPED                  },
>         { "file_dirty",                 NR_FILE_DIRTY                   },
> @@ -1541,6 +1542,7 @@ static int memcg_page_state_unit(int item)
>         switch (item) {
>         case MEMCG_PERCPU_B:
>         case MEMCG_ZSWAP_B:
> +       case MEMCG_ZSWAP_WB:
>         case NR_SLAB_RECLAIMABLE_B:
>         case NR_SLAB_UNRECLAIMABLE_B:
>         case WORKINGSET_REFAULT_ANON:
> @@ -7861,6 +7863,19 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
>         rcu_read_unlock();
>  }
>
> +void obj_cgroup_report_zswap_wb(struct obj_cgroup *objcg)
> +{
> +       struct mem_cgroup *memcg;
> +
> +       if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
> +               return;
> +
> +       rcu_read_lock();
> +       memcg = obj_cgroup_memcg(objcg);
> +       mod_memcg_state(memcg, MEMCG_ZSWAP_WB, 1);
> +       rcu_read_unlock();
> +}
> +
>  static u64 zswap_current_read(struct cgroup_subsys_state *css,
>                               struct cftype *cft)
>  {
> diff --git a/mm/zswap.c b/mm/zswap.c
> index d2989ad11814..15485427e3fa 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -704,6 +704,9 @@ static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_o
>         }
>         zswap_written_back_pages++;
>
> +       if (entry->objcg)
> +               obj_cgroup_report_zswap_wb(entry->objcg);
> +
>         /*
>          * Writeback started successfully, the page now belongs to the
>          * swapcache. Drop the entry from zswap - unless invalidate already
> --
> 2.34.1
  
Nhat Pham Oct. 18, 2023, 11:50 p.m. UTC | #5
On Wed, Oct 18, 2023 at 4:25 PM Yosry Ahmed <yosryahmed@google.com> wrote:
>
> On Tue, Oct 17, 2023 at 4:21 PM Nhat Pham <nphamcs@gmail.com> wrote:
> >
> > From: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
> >
> > Since zswap now writes back pages from memcg-specific LRUs, we now need a
> > new stat to show writebacks count for each memcg.
> >
> > Suggested-by: Nhat Pham <nphamcs@gmail.com>
> > Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
> > Signed-off-by: Nhat Pham <nphamcs@gmail.com>
> > ---
> >  include/linux/memcontrol.h |  2 ++
> >  mm/memcontrol.c            | 15 +++++++++++++++
> >  mm/zswap.c                 |  3 +++
> >  3 files changed, 20 insertions(+)
> >
> > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> > index 3de10fabea0f..7868b1e00bf5 100644
> > --- a/include/linux/memcontrol.h
> > +++ b/include/linux/memcontrol.h
> > @@ -38,6 +38,7 @@ enum memcg_stat_item {
> >         MEMCG_KMEM,
> >         MEMCG_ZSWAP_B,
> >         MEMCG_ZSWAPPED,
> > +       MEMCG_ZSWAP_WB,
> >         MEMCG_NR_STAT,
> >  };
> >
> > @@ -1884,6 +1885,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg,
> >  bool obj_cgroup_may_zswap(struct obj_cgroup *objcg);
> >  void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size);
> >  void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size);
> > +void obj_cgroup_report_zswap_wb(struct obj_cgroup *objcg);
> >  #else
> >  static inline bool obj_cgroup_may_zswap(struct obj_cgroup *objcg)
> >  {
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 1bde67b29287..a9118871e5a6 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -1505,6 +1505,7 @@ static const struct memory_stat memory_stats[] = {
> >  #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
> >         { "zswap",                      MEMCG_ZSWAP_B                   },
> >         { "zswapped",                   MEMCG_ZSWAPPED                  },
> > +       { "zswap_wb",                   MEMCG_ZSWAP_WB                  },
>
> zswap_writeback would be more consistent with file_writeback below.
>
> Taking a step back, this is not really a "state". We increment it by 1
> every time and never decrement it. Sounds awfully similar to events :)

Ah yeah, this is probably closer to zswpin/zswpout counters :)
We can probably re-use that piece of logic.

>
> You can also use count_objcg_event() directly and avoid the need for
> obj_cgroup_report_zswap_wb() below.
>
> >  #endif
> >         { "file_mapped",                NR_FILE_MAPPED                  },
> >         { "file_dirty",                 NR_FILE_DIRTY                   },
> > @@ -1541,6 +1542,7 @@ static int memcg_page_state_unit(int item)
> >         switch (item) {
> >         case MEMCG_PERCPU_B:
> >         case MEMCG_ZSWAP_B:
> > +       case MEMCG_ZSWAP_WB:
> >         case NR_SLAB_RECLAIMABLE_B:
> >         case NR_SLAB_UNRECLAIMABLE_B:
> >         case WORKINGSET_REFAULT_ANON:
> > @@ -7861,6 +7863,19 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
> >         rcu_read_unlock();
> >  }
> >
> > +void obj_cgroup_report_zswap_wb(struct obj_cgroup *objcg)
> > +{
> > +       struct mem_cgroup *memcg;
> > +
> > +       if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
> > +               return;
> > +
> > +       rcu_read_lock();
> > +       memcg = obj_cgroup_memcg(objcg);
> > +       mod_memcg_state(memcg, MEMCG_ZSWAP_WB, 1);
> > +       rcu_read_unlock();
> > +}
> > +
> >  static u64 zswap_current_read(struct cgroup_subsys_state *css,
> >                               struct cftype *cft)
> >  {
> > diff --git a/mm/zswap.c b/mm/zswap.c
> > index d2989ad11814..15485427e3fa 100644
> > --- a/mm/zswap.c
> > +++ b/mm/zswap.c
> > @@ -704,6 +704,9 @@ static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_o
> >         }
> >         zswap_written_back_pages++;
> >
> > +       if (entry->objcg)
> > +               obj_cgroup_report_zswap_wb(entry->objcg);
> > +
> >         /*
> >          * Writeback started successfully, the page now belongs to the
> >          * swapcache. Drop the entry from zswap - unless invalidate already
> > --
> > 2.34.1
  

Patch

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3de10fabea0f..7868b1e00bf5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -38,6 +38,7 @@  enum memcg_stat_item {
 	MEMCG_KMEM,
 	MEMCG_ZSWAP_B,
 	MEMCG_ZSWAPPED,
+	MEMCG_ZSWAP_WB,
 	MEMCG_NR_STAT,
 };
 
@@ -1884,6 +1885,7 @@  static inline void count_objcg_event(struct obj_cgroup *objcg,
 bool obj_cgroup_may_zswap(struct obj_cgroup *objcg);
 void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size);
 void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size);
+void obj_cgroup_report_zswap_wb(struct obj_cgroup *objcg);
 #else
 static inline bool obj_cgroup_may_zswap(struct obj_cgroup *objcg)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1bde67b29287..a9118871e5a6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1505,6 +1505,7 @@  static const struct memory_stat memory_stats[] = {
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
 	{ "zswap",			MEMCG_ZSWAP_B			},
 	{ "zswapped",			MEMCG_ZSWAPPED			},
+	{ "zswap_wb",			MEMCG_ZSWAP_WB			},
 #endif
 	{ "file_mapped",		NR_FILE_MAPPED			},
 	{ "file_dirty",			NR_FILE_DIRTY			},
@@ -1541,6 +1542,7 @@  static int memcg_page_state_unit(int item)
 	switch (item) {
 	case MEMCG_PERCPU_B:
 	case MEMCG_ZSWAP_B:
+	case MEMCG_ZSWAP_WB:
 	case NR_SLAB_RECLAIMABLE_B:
 	case NR_SLAB_UNRECLAIMABLE_B:
 	case WORKINGSET_REFAULT_ANON:
@@ -7861,6 +7863,19 @@  void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
 	rcu_read_unlock();
 }
 
+void obj_cgroup_report_zswap_wb(struct obj_cgroup *objcg)
+{
+	struct mem_cgroup *memcg;
+
+	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+		return;
+
+	rcu_read_lock();
+	memcg = obj_cgroup_memcg(objcg);
+	mod_memcg_state(memcg, MEMCG_ZSWAP_WB, 1);
+	rcu_read_unlock();
+}
+
 static u64 zswap_current_read(struct cgroup_subsys_state *css,
 			      struct cftype *cft)
 {
diff --git a/mm/zswap.c b/mm/zswap.c
index d2989ad11814..15485427e3fa 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -704,6 +704,9 @@  static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_o
 	}
 	zswap_written_back_pages++;
 
+	if (entry->objcg)
+		obj_cgroup_report_zswap_wb(entry->objcg);
+
 	/*
 	 * Writeback started successfully, the page now belongs to the
 	 * swapcache. Drop the entry from zswap - unless invalidate already