[RFC,7/8] memcg: add sysctl and config option to control memory recharging
Commit Message
Add a sysctl to enable/disable memory recharging for offline memcgs. Add
a config option to control whether or not it is enabled by default.
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
---
include/linux/memcontrol.h | 2 ++
kernel/sysctl.c | 11 +++++++++++
mm/Kconfig | 12 ++++++++++++
mm/memcontrol.c | 9 ++++++++-
4 files changed, 33 insertions(+), 1 deletion(-)
Comments
On Thu, Jul 20, 2023 at 07:08:24AM +0000, Yosry Ahmed wrote:
> Add a sysctl to enable/disable memory recharging for offline memcgs. Add
> a config option to control whether or not it is enabled by default.
>
> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> ---
> include/linux/memcontrol.h | 2 ++
> kernel/sysctl.c | 11 +++++++++++
> mm/Kconfig | 12 ++++++++++++
> mm/memcontrol.c | 9 ++++++++-
> 4 files changed, 33 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 59b653d4a76e..ae9f09ee90cb 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -60,6 +60,8 @@ struct mem_cgroup_reclaim_cookie {
>
> #ifdef CONFIG_MEMCG
>
> +extern int sysctl_recharge_offline_memcgs;
> +
> #define MEM_CGROUP_ID_SHIFT 16
> #define MEM_CGROUP_ID_MAX USHRT_MAX
>
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 354a2d294f52..1735d1d95652 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -2249,6 +2249,17 @@ static struct ctl_table vm_table[] = {
> .extra2 = (void *)&mmap_rnd_compat_bits_max,
> },
> #endif
> +#ifdef CONFIG_MEMCG
> + {
> + .procname = "recharge_offline_memcgs",
> + .data = &sysctl_recharge_offline_memcgs,
> + .maxlen = sizeof(sysctl_recharge_offline_memcgs),
> + .mode = 0644,
> + .proc_handler = proc_dointvec_minmax,
> + .extra1 = SYSCTL_ZERO,
> + .extra2 = SYSCTL_ONE,
> + },
> +#endif /* CONFIG_MEMCG */
> { }
> };
Please don't add any more sysctls to kernel/sysctl.c, git log that file
for a series of cleanups which show how to use your own and why we have
been doing that cleanup.
Luis
On Thu, Jul 20, 2023 at 11:13 AM Luis Chamberlain <mcgrof@kernel.org> wrote:
>
> On Thu, Jul 20, 2023 at 07:08:24AM +0000, Yosry Ahmed wrote:
> > Add a sysctl to enable/disable memory recharging for offline memcgs. Add
> > a config option to control whether or not it is enabled by default.
> >
> > Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> > ---
> > include/linux/memcontrol.h | 2 ++
> > kernel/sysctl.c | 11 +++++++++++
> > mm/Kconfig | 12 ++++++++++++
> > mm/memcontrol.c | 9 ++++++++-
> > 4 files changed, 33 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> > index 59b653d4a76e..ae9f09ee90cb 100644
> > --- a/include/linux/memcontrol.h
> > +++ b/include/linux/memcontrol.h
> > @@ -60,6 +60,8 @@ struct mem_cgroup_reclaim_cookie {
> >
> > #ifdef CONFIG_MEMCG
> >
> > +extern int sysctl_recharge_offline_memcgs;
> > +
> > #define MEM_CGROUP_ID_SHIFT 16
> > #define MEM_CGROUP_ID_MAX USHRT_MAX
> >
> > diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> > index 354a2d294f52..1735d1d95652 100644
> > --- a/kernel/sysctl.c
> > +++ b/kernel/sysctl.c
> > @@ -2249,6 +2249,17 @@ static struct ctl_table vm_table[] = {
> > .extra2 = (void *)&mmap_rnd_compat_bits_max,
> > },
> > #endif
> > +#ifdef CONFIG_MEMCG
> > + {
> > + .procname = "recharge_offline_memcgs",
> > + .data = &sysctl_recharge_offline_memcgs,
> > + .maxlen = sizeof(sysctl_recharge_offline_memcgs),
> > + .mode = 0644,
> > + .proc_handler = proc_dointvec_minmax,
> > + .extra1 = SYSCTL_ZERO,
> > + .extra2 = SYSCTL_ONE,
> > + },
> > +#endif /* CONFIG_MEMCG */
> > { }
> > };
>
> Please don't add any more sysctls to kernel/sysctl.c, git log that file
> for a series of cleanups which show how to use your own and why we have
> been doing that cleanup.
Thanks for pointing this out, I definitely missed it. Will do that in
the next version. I guess this will also reduce the reviewer churn if
I won't be touching kernel/sysctl.c?
>
> Luis
On Thu, Jul 20, 2023 at 11:24:20AM -0700, Yosry Ahmed wrote:
> On Thu, Jul 20, 2023 at 11:13 AM Luis Chamberlain <mcgrof@kernel.org> wrote:
> >
> > On Thu, Jul 20, 2023 at 07:08:24AM +0000, Yosry Ahmed wrote:
> > > Add a sysctl to enable/disable memory recharging for offline memcgs. Add
> > > a config option to control whether or not it is enabled by default.
> > >
> > > Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> > > ---
> > > include/linux/memcontrol.h | 2 ++
> > > kernel/sysctl.c | 11 +++++++++++
> > > mm/Kconfig | 12 ++++++++++++
> > > mm/memcontrol.c | 9 ++++++++-
> > > 4 files changed, 33 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> > > index 59b653d4a76e..ae9f09ee90cb 100644
> > > --- a/include/linux/memcontrol.h
> > > +++ b/include/linux/memcontrol.h
> > > @@ -60,6 +60,8 @@ struct mem_cgroup_reclaim_cookie {
> > >
> > > #ifdef CONFIG_MEMCG
> > >
> > > +extern int sysctl_recharge_offline_memcgs;
> > > +
> > > #define MEM_CGROUP_ID_SHIFT 16
> > > #define MEM_CGROUP_ID_MAX USHRT_MAX
> > >
> > > diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> > > index 354a2d294f52..1735d1d95652 100644
> > > --- a/kernel/sysctl.c
> > > +++ b/kernel/sysctl.c
> > > @@ -2249,6 +2249,17 @@ static struct ctl_table vm_table[] = {
> > > .extra2 = (void *)&mmap_rnd_compat_bits_max,
> > > },
> > > #endif
> > > +#ifdef CONFIG_MEMCG
> > > + {
> > > + .procname = "recharge_offline_memcgs",
> > > + .data = &sysctl_recharge_offline_memcgs,
> > > + .maxlen = sizeof(sysctl_recharge_offline_memcgs),
> > > + .mode = 0644,
> > > + .proc_handler = proc_dointvec_minmax,
> > > + .extra1 = SYSCTL_ZERO,
> > > + .extra2 = SYSCTL_ONE,
> > > + },
> > > +#endif /* CONFIG_MEMCG */
> > > { }
> > > };
> >
> > Please don't add any more sysctls to kernel/sysctl.c, git log that file
> > for a series of cleanups which show how to use your own and why we have
> > been doing that cleanup.
>
> Thanks for pointing this out, I definitely missed it. Will do that in
> the next version. I guess this will also reduce the reviewer churn if
> I won't be touching kernel/sysctl.c?
Right, it means I don't have to care anymore about random sysctl knobs.
Let people knob it all up.
Luis
@@ -60,6 +60,8 @@ struct mem_cgroup_reclaim_cookie {
#ifdef CONFIG_MEMCG
+extern int sysctl_recharge_offline_memcgs;
+
#define MEM_CGROUP_ID_SHIFT 16
#define MEM_CGROUP_ID_MAX USHRT_MAX
@@ -2249,6 +2249,17 @@ static struct ctl_table vm_table[] = {
.extra2 = (void *)&mmap_rnd_compat_bits_max,
},
#endif
+#ifdef CONFIG_MEMCG
+ {
+ .procname = "recharge_offline_memcgs",
+ .data = &sysctl_recharge_offline_memcgs,
+ .maxlen = sizeof(sysctl_recharge_offline_memcgs),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif /* CONFIG_MEMCG */
{ }
};
@@ -1236,6 +1236,18 @@ config LOCK_MM_AND_FIND_VMA
bool
depends on !STACK_GROWSUP
+config MEMCG_RECHARGE_OFFLINE_ENABLED
+ bool "Recharge memory charged to offline memcgs"
+ depends on MEMCG
+ help
+ When a memory cgroup is removed by userspace, try to recharge any
+ memory still charged to it to avoid having it live on as an offline
+ memcg. Offline memcgs potentially consume memory and limit scalability
+ of some operations.
+
+ This option enables the above behavior by default. It can be override
+ at runtime through /proc/sys/vm/recharge_offline_memcgs.
+
source "mm/damon/Kconfig"
endmenu
@@ -96,6 +96,9 @@ static bool cgroup_memory_nobpf __ro_after_init;
static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq);
#endif
+int sysctl_recharge_offline_memcgs __read_mostly = IS_ENABLED(
+ CONFIG_MEMCG_RECHARGE_OFFLINE_ENABLED);
+
static struct workqueue_struct *memcg_recharge_wq;
/* Whether legacy memory+swap accounting is active */
@@ -6592,7 +6595,8 @@ static void memcg_recharge_mapped_folios(struct mem_cgroup *memcg)
INIT_DELAYED_WORK(&memcg->recharge_mapped_work.dwork,
memcg_do_recharge_mapped_folios);
- if (memcg_recharge_wq && memcg_nr_local_mapped_pages(memcg)) {
+ if (sysctl_recharge_offline_memcgs &&
+ memcg_recharge_wq && memcg_nr_local_mapped_pages(memcg)) {
memcg->recharge_mapped_work.retries = 0;
queue_delayed_work(memcg_recharge_wq,
&memcg->recharge_mapped_work.dwork, 0);
@@ -6605,6 +6609,9 @@ static bool should_do_deferred_recharge(struct folio *folio)
struct mem_cgroup *memcg;
bool ret;
+ if (!sysctl_recharge_offline_memcgs)
+ return false;
+
rcu_read_lock();
memcg = folio_memcg_rcu(folio);
ret = memcg && !!(memcg->css.flags & CSS_DYING);