[v4,01/18] cgroup/misc: Add per resource callbacks for CSS events

Message ID 20230913040635.28815-2-haitao.huang@linux.intel.com
State New
Headers
Series Add Cgroup support for SGX EPC memory |

Commit Message

Haitao Huang Sept. 13, 2023, 4:06 a.m. UTC
  From: Kristen Carlson Accardi <kristen@linux.intel.com>

Consumers of the misc cgroup controller might need to perform separate
actions for Cgroups Subsystem State(CSS) events: cgroup alloc and free.
In addition, writes to the max value may also need separate action. Add
the ability to allow downstream users to setup callbacks for these
operations, and call the corresponding per-resource-type callback when
appropriate.

This code will be utilized by the SGX driver in a future patch.

Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
---
V4:
- Moved this to the front of the series.
- Applies on cgroup/for-6.6 with the overflow fix for misc.

V3:
- Removed the released() callback
---
 include/linux/misc_cgroup.h |  5 +++++
 kernel/cgroup/misc.c        | 32 +++++++++++++++++++++++++++++---
 2 files changed, 34 insertions(+), 3 deletions(-)
  

Comments

Jarkko Sakkinen Sept. 13, 2023, 9:39 a.m. UTC | #1
On Wed Sep 13, 2023 at 7:06 AM EEST, Haitao Huang wrote:
> From: Kristen Carlson Accardi <kristen@linux.intel.com>
>
> Consumers of the misc cgroup controller might need to perform separate
> actions for Cgroups Subsystem State(CSS) events: cgroup alloc and free.

nit: s/State(CSS)/State (CSS)/

"cgroup alloc" and "cgroup free" mean absolutely nothing.


> In addition, writes to the max value may also need separate action. Add

What "the max value"?

> the ability to allow downstream users to setup callbacks for these
> operations, and call the corresponding per-resource-type callback when
> appropriate.

Who are "the downstream users" and what sort of callbacks they setup?

>
> This code will be utilized by the SGX driver in a future patch.
>
> Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
> Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
> ---
> V4:
> - Moved this to the front of the series.
> - Applies on cgroup/for-6.6 with the overflow fix for misc.
>
> V3:
> - Removed the released() callback
> ---
>  include/linux/misc_cgroup.h |  5 +++++
>  kernel/cgroup/misc.c        | 32 +++++++++++++++++++++++++++++---
>  2 files changed, 34 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h
> index e799b1f8d05b..e1bcd176c2de 100644
> --- a/include/linux/misc_cgroup.h
> +++ b/include/linux/misc_cgroup.h
> @@ -37,6 +37,11 @@ struct misc_res {
>  	u64 max;
>  	atomic64_t usage;
>  	atomic64_t events;
> +
> +	/* per resource callback ops */
> +	int (*misc_cg_alloc)(struct misc_cg *cg);
> +	void (*misc_cg_free)(struct misc_cg *cg);
> +	void (*misc_cg_max_write)(struct misc_cg *cg);
>  };
>  
>  /**
> diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c
> index 79a3717a5803..e0092170d0dd 100644
> --- a/kernel/cgroup/misc.c
> +++ b/kernel/cgroup/misc.c
> @@ -276,10 +276,13 @@ static ssize_t misc_cg_max_write(struct kernfs_open_file *of, char *buf,
>  
>  	cg = css_misc(of_css(of));
>  
> -	if (READ_ONCE(misc_res_capacity[type]))
> +	if (READ_ONCE(misc_res_capacity[type])) {
>  		WRITE_ONCE(cg->res[type].max, max);
> -	else
> +		if (cg->res[type].misc_cg_max_write)
> +			cg->res[type].misc_cg_max_write(cg);
> +	} else {
>  		ret = -EINVAL;
> +	}
>  
>  	return ret ? ret : nbytes;
>  }
> @@ -383,23 +386,39 @@ static struct cftype misc_cg_files[] = {
>  static struct cgroup_subsys_state *
>  misc_cg_alloc(struct cgroup_subsys_state *parent_css)
>  {
> +	struct misc_cg *parent_cg;
>  	enum misc_res_type i;
>  	struct misc_cg *cg;
> +	int ret;
>  
>  	if (!parent_css) {
>  		cg = &root_cg;
> +		parent_cg = &root_cg;
>  	} else {
>  		cg = kzalloc(sizeof(*cg), GFP_KERNEL);
>  		if (!cg)
>  			return ERR_PTR(-ENOMEM);
> +		parent_cg = css_misc(parent_css);
>  	}
>  
>  	for (i = 0; i < MISC_CG_RES_TYPES; i++) {
>  		WRITE_ONCE(cg->res[i].max, MAX_NUM);
>  		atomic64_set(&cg->res[i].usage, 0);
> +		if (parent_cg->res[i].misc_cg_alloc) {
> +			ret = parent_cg->res[i].misc_cg_alloc(cg);
> +			if (ret)
> +				goto alloc_err;
> +		}
>  	}
>  
>  	return &cg->css;
> +
> +alloc_err:
> +	for (i = 0; i < MISC_CG_RES_TYPES; i++)
> +		if (parent_cg->res[i].misc_cg_free)
> +			cg->res[i].misc_cg_free(cg);
> +	kfree(cg);
> +	return ERR_PTR(ret);
>  }
>  
>  /**
> @@ -410,7 +429,14 @@ misc_cg_alloc(struct cgroup_subsys_state *parent_css)
>   */
>  static void misc_cg_free(struct cgroup_subsys_state *css)
>  {
> -	kfree(css_misc(css));
> +	struct misc_cg *cg = css_misc(css);
> +	enum misc_res_type i;
> +
> +	for (i = 0; i < MISC_CG_RES_TYPES; i++)
> +		if (cg->res[i].misc_cg_free)
> +			cg->res[i].misc_cg_free(cg);
> +
> +	kfree(cg);
>  }
>  
>  /* Cgroup controller callbacks */
> -- 
> 2.25.1

BR, Jarkko
  
Tejun Heo Sept. 15, 2023, 5:55 p.m. UTC | #2
On Tue, Sep 12, 2023 at 09:06:18PM -0700, Haitao Huang wrote:
> @@ -37,6 +37,11 @@ struct misc_res {
>  	u64 max;
>  	atomic64_t usage;
>  	atomic64_t events;
> +
> +	/* per resource callback ops */
> +	int (*misc_cg_alloc)(struct misc_cg *cg);
> +	void (*misc_cg_free)(struct misc_cg *cg);
> +	void (*misc_cg_max_write)(struct misc_cg *cg);

A nit about naming. These are already in misc_res and cgroup_ and cgrp_
prefixes are a lot more common. So, maybe go for sth like cgrp_alloc?

Thanks.
  
Tejun Heo Sept. 15, 2023, 5:58 p.m. UTC | #3
On Fri, Sep 15, 2023 at 07:55:45AM -1000, Tejun Heo wrote:
> On Tue, Sep 12, 2023 at 09:06:18PM -0700, Haitao Huang wrote:
> > @@ -37,6 +37,11 @@ struct misc_res {
> >  	u64 max;
> >  	atomic64_t usage;
> >  	atomic64_t events;
> > +
> > +	/* per resource callback ops */
> > +	int (*misc_cg_alloc)(struct misc_cg *cg);
> > +	void (*misc_cg_free)(struct misc_cg *cg);
> > +	void (*misc_cg_max_write)(struct misc_cg *cg);
> 
> A nit about naming. These are already in misc_res and cgroup_ and cgrp_
> prefixes are a lot more common. So, maybe go for sth like cgrp_alloc?

Ah, never mind about the prefix part. misc is using cg_ prefix widely
already.

Thanks.
  
Haitao Huang Sept. 16, 2023, 1:27 a.m. UTC | #4
On Fri, 15 Sep 2023 12:58:11 -0500, Tejun Heo <tj@kernel.org> wrote:

> On Fri, Sep 15, 2023 at 07:55:45AM -1000, Tejun Heo wrote:
>> On Tue, Sep 12, 2023 at 09:06:18PM -0700, Haitao Huang wrote:
>> > @@ -37,6 +37,11 @@ struct misc_res {
>> >  	u64 max;
>> >  	atomic64_t usage;
>> >  	atomic64_t events;
>> > +
>> > +	/* per resource callback ops */
>> > +	int (*misc_cg_alloc)(struct misc_cg *cg);
>> > +	void (*misc_cg_free)(struct misc_cg *cg);
>> > +	void (*misc_cg_max_write)(struct misc_cg *cg);
>>
>> A nit about naming. These are already in misc_res and cgroup_ and cgrp_
>> prefixes are a lot more common. So, maybe go for sth like cgrp_alloc?
>
> Ah, never mind about the prefix part. misc is using cg_ prefix widely
> already.
>


Change them to plain alloc, free, max_write? As they are per resource  
type, not per cgroup.
Also following no-prefix naming scheme like "open" for fops, vma_ops, etc.

Thanks for your review.

Haitao
  
Haitao Huang Sept. 16, 2023, 4:11 a.m. UTC | #5
Hi Jarkko

On Wed, 13 Sep 2023 04:39:06 -0500, Jarkko Sakkinen <jarkko@kernel.org>  
wrote:

> On Wed Sep 13, 2023 at 7:06 AM EEST, Haitao Huang wrote:
>> From: Kristen Carlson Accardi <kristen@linux.intel.com>
>>
>> Consumers of the misc cgroup controller might need to perform separate
>> actions for Cgroups Subsystem State(CSS) events: cgroup alloc and free.
>
> nit: s/State(CSS)/State (CSS)/
>
> "cgroup alloc" and "cgroup free" mean absolutely nothing.
>
>
>> In addition, writes to the max value may also need separate action. Add
>
> What "the max value"?
>
>> the ability to allow downstream users to setup callbacks for these
>> operations, and call the corresponding per-resource-type callback when
>> appropriate.
>
> Who are "the downstream users" and what sort of callbacks they setup?

How about this?

The misc cgroup controller (subsystem) currently does not perform resource  
type specific action for Cgroups Subsystem State (CSS) events: the  
'css_alloc' event when a cgroup is created and the 'css_free' event when a  
cgroup is destroyed, or in event of user writing the max value to the  
misc.max file to set the consumption limit of a specific resource  
[admin-guide/cgroup-v2.rst, 5-9. Misc].

Define callbacks for those events and allow resource providers to register  
the callbacks per resource type as needed. This will be utilized later by  
the EPC misc cgroup support implemented in the SGX driver:
- On cgroup alloc, allocate and initialize necessary structures for EPC  
reclaiming, e.g., LRU list, work queue, etc.
- On cgroup free, cleanup and free those structures created in alloc.
- On max write, trigger EPC reclaiming if the new limit is at or below  
current consumption.

Thanks
Haitao
  
Jarkko Sakkinen Sept. 25, 2023, 4:57 p.m. UTC | #6
On Sat Sep 16, 2023 at 7:11 AM EEST, Haitao Huang wrote:
> Hi Jarkko
>
> On Wed, 13 Sep 2023 04:39:06 -0500, Jarkko Sakkinen <jarkko@kernel.org>  
> wrote:
>
> > On Wed Sep 13, 2023 at 7:06 AM EEST, Haitao Huang wrote:
> >> From: Kristen Carlson Accardi <kristen@linux.intel.com>
> >>
> >> Consumers of the misc cgroup controller might need to perform separate
> >> actions for Cgroups Subsystem State(CSS) events: cgroup alloc and free.
> >
> > nit: s/State(CSS)/State (CSS)/
> >
> > "cgroup alloc" and "cgroup free" mean absolutely nothing.
> >
> >
> >> In addition, writes to the max value may also need separate action. Add
> >
> > What "the max value"?
> >
> >> the ability to allow downstream users to setup callbacks for these
> >> operations, and call the corresponding per-resource-type callback when
> >> appropriate.
> >
> > Who are "the downstream users" and what sort of callbacks they setup?
>
> How about this?
>
> The misc cgroup controller (subsystem) currently does not perform resource  
> type specific action for Cgroups Subsystem State (CSS) events: the  
> 'css_alloc' event when a cgroup is created and the 'css_free' event when a  
> cgroup is destroyed, or in event of user writing the max value to the  
> misc.max file to set the consumption limit of a specific resource  
> [admin-guide/cgroup-v2.rst, 5-9. Misc].
>
> Define callbacks for those events and allow resource providers to register  
> the callbacks per resource type as needed. This will be utilized later by  
> the EPC misc cgroup support implemented in the SGX driver:
> - On cgroup alloc, allocate and initialize necessary structures for EPC  
> reclaiming, e.g., LRU list, work queue, etc.
> - On cgroup free, cleanup and free those structures created in alloc.
> - On max write, trigger EPC reclaiming if the new limit is at or below  
> current consumption.

Yeah, this is much better (I was on holiday, thus the delay on
response).

> Thanks
> Haitao

BR, Jarkko
  

Patch

diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h
index e799b1f8d05b..e1bcd176c2de 100644
--- a/include/linux/misc_cgroup.h
+++ b/include/linux/misc_cgroup.h
@@ -37,6 +37,11 @@  struct misc_res {
 	u64 max;
 	atomic64_t usage;
 	atomic64_t events;
+
+	/* per resource callback ops */
+	int (*misc_cg_alloc)(struct misc_cg *cg);
+	void (*misc_cg_free)(struct misc_cg *cg);
+	void (*misc_cg_max_write)(struct misc_cg *cg);
 };
 
 /**
diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c
index 79a3717a5803..e0092170d0dd 100644
--- a/kernel/cgroup/misc.c
+++ b/kernel/cgroup/misc.c
@@ -276,10 +276,13 @@  static ssize_t misc_cg_max_write(struct kernfs_open_file *of, char *buf,
 
 	cg = css_misc(of_css(of));
 
-	if (READ_ONCE(misc_res_capacity[type]))
+	if (READ_ONCE(misc_res_capacity[type])) {
 		WRITE_ONCE(cg->res[type].max, max);
-	else
+		if (cg->res[type].misc_cg_max_write)
+			cg->res[type].misc_cg_max_write(cg);
+	} else {
 		ret = -EINVAL;
+	}
 
 	return ret ? ret : nbytes;
 }
@@ -383,23 +386,39 @@  static struct cftype misc_cg_files[] = {
 static struct cgroup_subsys_state *
 misc_cg_alloc(struct cgroup_subsys_state *parent_css)
 {
+	struct misc_cg *parent_cg;
 	enum misc_res_type i;
 	struct misc_cg *cg;
+	int ret;
 
 	if (!parent_css) {
 		cg = &root_cg;
+		parent_cg = &root_cg;
 	} else {
 		cg = kzalloc(sizeof(*cg), GFP_KERNEL);
 		if (!cg)
 			return ERR_PTR(-ENOMEM);
+		parent_cg = css_misc(parent_css);
 	}
 
 	for (i = 0; i < MISC_CG_RES_TYPES; i++) {
 		WRITE_ONCE(cg->res[i].max, MAX_NUM);
 		atomic64_set(&cg->res[i].usage, 0);
+		if (parent_cg->res[i].misc_cg_alloc) {
+			ret = parent_cg->res[i].misc_cg_alloc(cg);
+			if (ret)
+				goto alloc_err;
+		}
 	}
 
 	return &cg->css;
+
+alloc_err:
+	for (i = 0; i < MISC_CG_RES_TYPES; i++)
+		if (parent_cg->res[i].misc_cg_free)
+			cg->res[i].misc_cg_free(cg);
+	kfree(cg);
+	return ERR_PTR(ret);
 }
 
 /**
@@ -410,7 +429,14 @@  misc_cg_alloc(struct cgroup_subsys_state *parent_css)
  */
 static void misc_cg_free(struct cgroup_subsys_state *css)
 {
-	kfree(css_misc(css));
+	struct misc_cg *cg = css_misc(css);
+	enum misc_res_type i;
+
+	for (i = 0; i < MISC_CG_RES_TYPES; i++)
+		if (cg->res[i].misc_cg_free)
+			cg->res[i].misc_cg_free(cg);
+
+	kfree(cg);
 }
 
 /* Cgroup controller callbacks */