[v5,7/8] x86/resctrl: Move default control group creation during mount

Message ID 168564612103.527584.4866621411469438225.stgit@bmoger-ubuntu
State New
Headers
Series x86/resctrl: Miscellaneous resctrl features |

Commit Message

Moger, Babu June 1, 2023, 7:02 p.m. UTC
  Currently, the resctrl default control group is created during kernel
init time and rest of the files are added during mount. If the new
files are to be added to the default group during the mount then it
has to be done separately again.

This can avoided if all the files are created during the mount and
destroyed during the umount. Move the default group creation in
rdt_get_tree and removal in rdt_kill_sb.

Suggested-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Babu Moger <babu.moger@amd.com>
---
 arch/x86/kernel/cpu/resctrl/rdtgroup.c |   59 ++++++++++++++++----------------
 1 file changed, 30 insertions(+), 29 deletions(-)
  

Comments

Reinette Chatre July 7, 2023, 9:46 p.m. UTC | #1
Hi Babu,

On 6/1/2023 12:02 PM, Babu Moger wrote:
> Currently, the resctrl default control group is created during kernel
> init time and rest of the files are added during mount. If the new

Please drop the word "Currently"

> files are to be added to the default group during the mount then it
> has to be done separately again.
> 
> This can avoided if all the files are created during the mount and
> destroyed during the umount. Move the default group creation in

"creation in" -> "creation to"?

> rdt_get_tree and removal in rdt_kill_sb.

I think it would be simpler if this patch is moved earlier in series
then patch 8 can more easily be squashed where appropriate.

> 
> Suggested-by: Reinette Chatre <reinette.chatre@intel.com>
> Signed-off-by: Babu Moger <babu.moger@amd.com>
> ---
>  arch/x86/kernel/cpu/resctrl/rdtgroup.c |   59 ++++++++++++++++----------------
>  1 file changed, 30 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 2f5cdc638607..e03cb01c4742 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -57,6 +57,7 @@ static char last_cmd_status_buf[512];
>  struct dentry *debugfs_resctrl;
>  
>  static bool resctrl_debug;
> +static int rdtgroup_setup_root(void);
>  
>  void rdt_last_cmd_clear(void)
>  {
> @@ -2515,13 +2516,6 @@ static int rdt_get_tree(struct fs_context *fc)
>  
>  	cpus_read_lock();
>  	mutex_lock(&rdtgroup_mutex);
> -	/*
> -	 * resctrl file system can only be mounted once.
> -	 */
> -	if (static_branch_unlikely(&rdt_enable_key)) {
> -		ret = -EBUSY;
> -		goto out;
> -	}
>  

This change is unexpected.

>  	ret = rdt_enable_ctx(ctx);
>  	if (ret < 0)
> @@ -2535,9 +2529,15 @@ static int rdt_get_tree(struct fs_context *fc)
>  
>  	closid_init();
>  
> +	ret = rdtgroup_add_files(rdtgroup_default.kn, RFTYPE_CTRL_BASE);
> +	if (ret)
> +		goto out_schemata_free;
> +
> +	kernfs_activate(rdtgroup_default.kn);
> +
>  	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
>  	if (ret < 0)
> -		goto out_schemata_free;
> +		goto out_default;
>  
>  	if (rdt_mon_capable) {
>  		ret = mongroup_create_dir(rdtgroup_default.kn,
> @@ -2587,6 +2587,8 @@ static int rdt_get_tree(struct fs_context *fc)
>  		kernfs_remove(kn_mongrp);
>  out_info:
>  	kernfs_remove(kn_info);
> +out_default:
> +	kernfs_remove(rdtgroup_default.kn);
>  out_schemata_free:
>  	schemata_list_destroy();
>  out_mba:
> @@ -2664,10 +2666,23 @@ static const struct fs_context_operations rdt_fs_context_ops = {
>  static int rdt_init_fs_context(struct fs_context *fc)
>  {
>  	struct rdt_fs_context *ctx;
> +	int ret;
> +
> +	/*
> +	 * resctrl file system can only be mounted once.
> +	 */
> +	if (static_branch_unlikely(&rdt_enable_key))
> +		return -EBUSY;
> +
> +	ret = rdtgroup_setup_root();
> +	if (ret)
> +		return ret;
>  

Why was it necessary to move this code?

>  	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
> -	if (!ctx)
> +	if (!ctx) {
> +		kernfs_destroy_root(rdt_root);
>  		return -ENOMEM;
> +	}
>  
>  	ctx->kfc.root = rdt_root;
>  	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
> @@ -2845,6 +2860,9 @@ static void rdt_kill_sb(struct super_block *sb)
>  	static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
>  	static_branch_disable_cpuslocked(&rdt_mon_enable_key);
>  	static_branch_disable_cpuslocked(&rdt_enable_key);
> +	/* Remove the default group and cleanup the root */
> +	list_del(&rdtgroup_default.rdtgroup_list);
> +	kernfs_destroy_root(rdt_root);

Why not just add kernfs_remove(rdtgroup_default.kn) to rmdir_all_sub()?

>  	kernfs_kill_sb(sb);
>  	mutex_unlock(&rdtgroup_mutex);
>  	cpus_read_unlock();
> @@ -3598,10 +3616,8 @@ static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
>  	.show_options	= rdtgroup_show_options,
>  };
>  
> -static int __init rdtgroup_setup_root(void)
> +static int rdtgroup_setup_root(void)
>  {
> -	int ret;
> -
>  	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
>  				      KERNFS_ROOT_CREATE_DEACTIVATED |
>  				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
> @@ -3618,19 +3634,11 @@ static int __init rdtgroup_setup_root(void)
>  
>  	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
>  
> -	ret = rdtgroup_add_files(kernfs_root_to_node(rdt_root), RFTYPE_CTRL_BASE);
> -	if (ret) {
> -		kernfs_destroy_root(rdt_root);
> -		goto out;
> -	}
> -
>  	rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
> -	kernfs_activate(rdtgroup_default.kn);
>  
> -out:
>  	mutex_unlock(&rdtgroup_mutex);
>  
> -	return ret;
> +	return 0;
>  }
>  
>  static void domain_destroy_mon_state(struct rdt_domain *d)
> @@ -3752,13 +3760,9 @@ int __init rdtgroup_init(void)
>  	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
>  		     sizeof(last_cmd_status_buf));
>  
> -	ret = rdtgroup_setup_root();
> -	if (ret)
> -		return ret;
> -
>  	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
>  	if (ret)
> -		goto cleanup_root;
> +		return ret;
>  

It is not clear to me why this change is required, could you
please elaborate? It seems that all that is needed is for 
rdtgroup_add_files() to move to rdt_get_tree() (which you have done)
and then an additional call to kernfs_remove() in rmdir_all_sub().
I must be missing something, could you please help me understand?

>  	ret = register_filesystem(&rdt_fs_type);
>  	if (ret)
> @@ -3791,8 +3795,6 @@ int __init rdtgroup_init(void)
>  
>  cleanup_mountpoint:
>  	sysfs_remove_mount_point(fs_kobj, "resctrl");
> -cleanup_root:
> -	kernfs_destroy_root(rdt_root);
>  
>  	return ret;
>  }
> @@ -3802,5 +3804,4 @@ void __exit rdtgroup_exit(void)
>  	debugfs_remove_recursive(debugfs_resctrl);
>  	unregister_filesystem(&rdt_fs_type);
>  	sysfs_remove_mount_point(fs_kobj, "resctrl");
> -	kernfs_destroy_root(rdt_root);
>  }
> 
> 

Reinette
  
Moger, Babu July 14, 2023, 4:26 p.m. UTC | #2
Hi Reinette,
Sorry.. Took a while to respond. I had to recreate the issue to refresh my
memory.

On 7/7/23 16:46, Reinette Chatre wrote:
> Hi Babu,
> 
> On 6/1/2023 12:02 PM, Babu Moger wrote:
>> Currently, the resctrl default control group is created during kernel
>> init time and rest of the files are added during mount. If the new
> 
> Please drop the word "Currently"

Sure
> 
>> files are to be added to the default group during the mount then it
>> has to be done separately again.
>>
>> This can avoided if all the files are created during the mount and
>> destroyed during the umount. Move the default group creation in
> 
> "creation in" -> "creation to"?

Sure
> 
>> rdt_get_tree and removal in rdt_kill_sb.
> 
> I think it would be simpler if this patch is moved earlier in series
> then patch 8 can more easily be squashed where appropriate.

Yes, I was thinking about that.
> 
>>
>> Suggested-by: Reinette Chatre <reinette.chatre@intel.com>
>> Signed-off-by: Babu Moger <babu.moger@amd.com>
>> ---
>>  arch/x86/kernel/cpu/resctrl/rdtgroup.c |   59 ++++++++++++++++----------------
>>  1 file changed, 30 insertions(+), 29 deletions(-)
>>
>> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> index 2f5cdc638607..e03cb01c4742 100644
>> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> @@ -57,6 +57,7 @@ static char last_cmd_status_buf[512];
>>  struct dentry *debugfs_resctrl;
>>  
>>  static bool resctrl_debug;
>> +static int rdtgroup_setup_root(void);
>>  
>>  void rdt_last_cmd_clear(void)
>>  {
>> @@ -2515,13 +2516,6 @@ static int rdt_get_tree(struct fs_context *fc)
>>  
>>  	cpus_read_lock();
>>  	mutex_lock(&rdtgroup_mutex);
>> -	/*
>> -	 * resctrl file system can only be mounted once.
>> -	 */
>> -	if (static_branch_unlikely(&rdt_enable_key)) {
>> -		ret = -EBUSY;
>> -		goto out;
>> -	}
>>  
> 
> This change is unexpected.

Please see my comments below.

> 
>>  	ret = rdt_enable_ctx(ctx);
>>  	if (ret < 0)
>> @@ -2535,9 +2529,15 @@ static int rdt_get_tree(struct fs_context *fc)
>>  
>>  	closid_init();
>>  
>> +	ret = rdtgroup_add_files(rdtgroup_default.kn, RFTYPE_CTRL_BASE);
>> +	if (ret)
>> +		goto out_schemata_free;
>> +
>> +	kernfs_activate(rdtgroup_default.kn);
>> +
>>  	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
>>  	if (ret < 0)
>> -		goto out_schemata_free;
>> +		goto out_default;
>>  
>>  	if (rdt_mon_capable) {
>>  		ret = mongroup_create_dir(rdtgroup_default.kn,
>> @@ -2587,6 +2587,8 @@ static int rdt_get_tree(struct fs_context *fc)
>>  		kernfs_remove(kn_mongrp);
>>  out_info:
>>  	kernfs_remove(kn_info);
>> +out_default:
>> +	kernfs_remove(rdtgroup_default.kn);
>>  out_schemata_free:
>>  	schemata_list_destroy();
>>  out_mba:
>> @@ -2664,10 +2666,23 @@ static const struct fs_context_operations rdt_fs_context_ops = {
>>  static int rdt_init_fs_context(struct fs_context *fc)
>>  {
>>  	struct rdt_fs_context *ctx;
>> +	int ret;
>> +
>> +	/*
>> +	 * resctrl file system can only be mounted once.
>> +	 */
>> +	if (static_branch_unlikely(&rdt_enable_key))
>> +		return -EBUSY;
>> +
>> +	ret = rdtgroup_setup_root();
>> +	if (ret)
>> +		return ret;
>>  
> 
> Why was it necessary to move this code?

Please see my comments below..
> 
>>  	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
>> -	if (!ctx)
>> +	if (!ctx) {
>> +		kernfs_destroy_root(rdt_root);
>>  		return -ENOMEM;
>> +	}
>>  
>>  	ctx->kfc.root = rdt_root;
>>  	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
>> @@ -2845,6 +2860,9 @@ static void rdt_kill_sb(struct super_block *sb)
>>  	static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
>>  	static_branch_disable_cpuslocked(&rdt_mon_enable_key);
>>  	static_branch_disable_cpuslocked(&rdt_enable_key);
>> +	/* Remove the default group and cleanup the root */
>> +	list_del(&rdtgroup_default.rdtgroup_list);
>> +	kernfs_destroy_root(rdt_root);
> 
> Why not just add kernfs_remove(rdtgroup_default.kn) to rmdir_all_sub()?

List rdtgroup_default.rdtgroup_list is added during the mount and had to
be removed during umount and rdt_root is destroyed here.
Please see more comments below.

> 
>>  	kernfs_kill_sb(sb);
>>  	mutex_unlock(&rdtgroup_mutex);
>>  	cpus_read_unlock();
>> @@ -3598,10 +3616,8 @@ static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
>>  	.show_options	= rdtgroup_show_options,
>>  };
>>  
>> -static int __init rdtgroup_setup_root(void)
>> +static int rdtgroup_setup_root(void)
>>  {
>> -	int ret;
>> -
>>  	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
>>  				      KERNFS_ROOT_CREATE_DEACTIVATED |
>>  				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
>> @@ -3618,19 +3634,11 @@ static int __init rdtgroup_setup_root(void)
>>  
>>  	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
>>  
>> -	ret = rdtgroup_add_files(kernfs_root_to_node(rdt_root), RFTYPE_CTRL_BASE);
>> -	if (ret) {
>> -		kernfs_destroy_root(rdt_root);
>> -		goto out;
>> -	}
>> -
>>  	rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
>> -	kernfs_activate(rdtgroup_default.kn);
>>  
>> -out:
>>  	mutex_unlock(&rdtgroup_mutex);
>>  
>> -	return ret;
>> +	return 0;
>>  }
>>  
>>  static void domain_destroy_mon_state(struct rdt_domain *d)
>> @@ -3752,13 +3760,9 @@ int __init rdtgroup_init(void)
>>  	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
>>  		     sizeof(last_cmd_status_buf));
>>  
>> -	ret = rdtgroup_setup_root();
>> -	if (ret)
>> -		return ret;
>> -
>>  	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
>>  	if (ret)
>> -		goto cleanup_root;
>> +		return ret;
>>  
> 
> It is not clear to me why this change is required, could you
> please elaborate? It seems that all that is needed is for 
> rdtgroup_add_files() to move to rdt_get_tree() (which you have done)
> and then an additional call to kernfs_remove() in rmdir_all_sub().
> I must be missing something, could you please help me understand?
> 

Yes. I started with that approach. But there are issues with that approach.

Currently, rdt_root(which is rdtgroup_default.kn) is created during
rdtgroup_init. At the same time the root files are created. Also, default
group is added to rdt_all_groups. Basically, the root files and
rdtgroup_default group is always there even though filesystem is never
mounted. Also mbm_over and cqm_limbo workqueues are always running even
though filesystem is not mounted.

I changed rdtgroup_add_files() to move to rdt_get_tree() and added
kernfs_remove() in rmdir_all_sub(). This caused problems. The
kernfs_remove(rdtgroup_default.kn) removes all the reference counts and
releases the root. When we mount again, we hit this this problem below.

[  404.558461] ------------[ cut here ]------------
[  404.563631] WARNING: CPU: 35 PID: 7728 at fs/kernfs/dir.c:522
kernfs_new_node+0x63/0x70

404.778793]  ? __warn+0x81/0x140
[  404.782535]  ? kernfs_new_node+0x63/0x70
[  404.787036]  ? report_bug+0x102/0x200
[  404.791247]  ? handle_bug+0x3f/0x70
[  404.795269]  ? exc_invalid_op+0x13/0x60
[  404.799671]  ? asm_exc_invalid_op+0x16/0x20
[  404.804461]  ? kernfs_new_node+0x63/0x70
[  404.808954]  ? snprintf+0x49/0x70
[  404.812762]  __kernfs_create_file+0x30/0xc0
[  404.817534]  rdtgroup_add_files+0x6c/0x100

Basically kernel says your rdt_root is not initialized. That is the reason
I had to move everything to mount time. The rdt_root is created and
initialized during the mount and also destroyed during the umount.
And I had to move rdt_enable_key check during rdt_root creation.
  
Reinette Chatre July 14, 2023, 9:54 p.m. UTC | #3
Hi Babu,

On 7/14/2023 9:26 AM, Moger, Babu wrote:
> Hi Reinette,
> Sorry.. Took a while to respond. I had to recreate the issue to refresh my
> memory.

No problem!

> On 7/7/23 16:46, Reinette Chatre wrote:
>> Hi Babu,
>>
>> On 6/1/2023 12:02 PM, Babu Moger wrote:


>>>  	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
>>> -	if (!ctx)
>>> +	if (!ctx) {
>>> +		kernfs_destroy_root(rdt_root);
>>>  		return -ENOMEM;
>>> +	}
>>>  
>>>  	ctx->kfc.root = rdt_root;
>>>  	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
>>> @@ -2845,6 +2860,9 @@ static void rdt_kill_sb(struct super_block *sb)
>>>  	static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
>>>  	static_branch_disable_cpuslocked(&rdt_mon_enable_key);
>>>  	static_branch_disable_cpuslocked(&rdt_enable_key);
>>> +	/* Remove the default group and cleanup the root */
>>> +	list_del(&rdtgroup_default.rdtgroup_list);
>>> +	kernfs_destroy_root(rdt_root);
>>
>> Why not just add kernfs_remove(rdtgroup_default.kn) to rmdir_all_sub()?
> 
> List rdtgroup_default.rdtgroup_list is added during the mount and had to
> be removed during umount and rdt_root is destroyed here.

I do not think it is required for default resource group management to
be tied with the resctrl files associated with default resource group.

I think rdtgroup_setup_root can be split in two, one for all the
resctrl files that should be done at mount/unmount and one for the
default group init done at __init.

>>>  	kernfs_kill_sb(sb);
>>>  	mutex_unlock(&rdtgroup_mutex);
>>>  	cpus_read_unlock();
>>> @@ -3598,10 +3616,8 @@ static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
>>>  	.show_options	= rdtgroup_show_options,
>>>  };
>>>  
>>> -static int __init rdtgroup_setup_root(void)
>>> +static int rdtgroup_setup_root(void)
>>>  {
>>> -	int ret;
>>> -
>>>  	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
>>>  				      KERNFS_ROOT_CREATE_DEACTIVATED |
>>>  				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
>>> @@ -3618,19 +3634,11 @@ static int __init rdtgroup_setup_root(void)
>>>  
>>>  	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
>>>  
>>> -	ret = rdtgroup_add_files(kernfs_root_to_node(rdt_root), RFTYPE_CTRL_BASE);
>>> -	if (ret) {
>>> -		kernfs_destroy_root(rdt_root);
>>> -		goto out;
>>> -	}
>>> -
>>>  	rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
>>> -	kernfs_activate(rdtgroup_default.kn);
>>>  
>>> -out:
>>>  	mutex_unlock(&rdtgroup_mutex);
>>>  
>>> -	return ret;
>>> +	return 0;
>>>  }
>>>  
>>>  static void domain_destroy_mon_state(struct rdt_domain *d)
>>> @@ -3752,13 +3760,9 @@ int __init rdtgroup_init(void)
>>>  	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
>>>  		     sizeof(last_cmd_status_buf));
>>>  
>>> -	ret = rdtgroup_setup_root();
>>> -	if (ret)
>>> -		return ret;
>>> -
>>>  	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
>>>  	if (ret)
>>> -		goto cleanup_root;
>>> +		return ret;
>>>  
>>
>> It is not clear to me why this change is required, could you
>> please elaborate? It seems that all that is needed is for 
>> rdtgroup_add_files() to move to rdt_get_tree() (which you have done)
>> and then an additional call to kernfs_remove() in rmdir_all_sub().
>> I must be missing something, could you please help me understand?
>>
> 
> Yes. I started with that approach. But there are issues with that approach.
> 
> Currently, rdt_root(which is rdtgroup_default.kn) is created during
> rdtgroup_init. At the same time the root files are created. Also, default
> group is added to rdt_all_groups. Basically, the root files and
> rdtgroup_default group is always there even though filesystem is never
> mounted. Also mbm_over and cqm_limbo workqueues are always running even
> though filesystem is not mounted.
> 
> I changed rdtgroup_add_files() to move to rdt_get_tree() and added
> kernfs_remove() in rmdir_all_sub(). This caused problems. The
> kernfs_remove(rdtgroup_default.kn) removes all the reference counts and
> releases the root. When we mount again, we hit this this problem below.
> 
> [  404.558461] ------------[ cut here ]------------
> [  404.563631] WARNING: CPU: 35 PID: 7728 at fs/kernfs/dir.c:522
> kernfs_new_node+0x63/0x70
> 
> 404.778793]  ? __warn+0x81/0x140
> [  404.782535]  ? kernfs_new_node+0x63/0x70
> [  404.787036]  ? report_bug+0x102/0x200
> [  404.791247]  ? handle_bug+0x3f/0x70
> [  404.795269]  ? exc_invalid_op+0x13/0x60
> [  404.799671]  ? asm_exc_invalid_op+0x16/0x20
> [  404.804461]  ? kernfs_new_node+0x63/0x70
> [  404.808954]  ? snprintf+0x49/0x70
> [  404.812762]  __kernfs_create_file+0x30/0xc0
> [  404.817534]  rdtgroup_add_files+0x6c/0x100
> 
> Basically kernel says your rdt_root is not initialized. That is the reason
> I had to move everything to mount time. The rdt_root is created and
> initialized during the mount and also destroyed during the umount.
> And I had to move rdt_enable_key check during rdt_root creation.
> 

ok, thank you for the additional details. I see now how this patch evolved.
I understand how rdt_root needs to be created/destroyed
during mount/unmount. If I understand correctly the changes to
rdt_init_fs_context() was motivated by this line:

	ctx->kfc.root = rdt_root;

... that prompted you to move rdt_root creation there in order to have
it present for this assignment and that prompted the
rdt_enable_key check to follow. Is this correct?

I am concerned about the changes to rdt_init_fs_context() since it further
separates the resctrl file management, it breaks the symmetry of the
key checked and set, and finally these new actions seem unrelated to a function
named "init_fs_context". I looked at other examples and from what I can tell
it is not required that ctx->kfc.root be initialized within
rdt_init_fs_context(). Looks like the value is required by kernfs_get_tree()
that is called from rdt_get_tree(). For comparison I found cgroup_do_get_tree().
Note how cgroup_do_get_tree(), within the .get_tree callback,
initializes kernfs_fs_context.root and then call kernfs_get_tree()? 

It thus looks to me as though things can be simplified significantly
if the kernfs_fs_context.root assignment is moved from rdt_init_fs_context()
to rdt_get_tree(). rdt_get_tree() can then create rdt_root (and add all needed
files), assign it to kernfs_fs_context.root and call kernfs_get_tree().

What do you think?

Reinette
  
Moger, Babu July 14, 2023, 10:42 p.m. UTC | #4
Hi Reinette,

On 7/14/23 16:54, Reinette Chatre wrote:
> Hi Babu,
> 
> On 7/14/2023 9:26 AM, Moger, Babu wrote:
>> Hi Reinette,
>> Sorry.. Took a while to respond. I had to recreate the issue to refresh my
>> memory.
> 
> No problem!
> 
>> On 7/7/23 16:46, Reinette Chatre wrote:
>>> Hi Babu,
>>>
>>> On 6/1/2023 12:02 PM, Babu Moger wrote:
> 
> 
>>>>  	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
>>>> -	if (!ctx)
>>>> +	if (!ctx) {
>>>> +		kernfs_destroy_root(rdt_root);
>>>>  		return -ENOMEM;
>>>> +	}
>>>>  
>>>>  	ctx->kfc.root = rdt_root;
>>>>  	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
>>>> @@ -2845,6 +2860,9 @@ static void rdt_kill_sb(struct super_block *sb)
>>>>  	static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
>>>>  	static_branch_disable_cpuslocked(&rdt_mon_enable_key);
>>>>  	static_branch_disable_cpuslocked(&rdt_enable_key);
>>>> +	/* Remove the default group and cleanup the root */
>>>> +	list_del(&rdtgroup_default.rdtgroup_list);
>>>> +	kernfs_destroy_root(rdt_root);
>>>
>>> Why not just add kernfs_remove(rdtgroup_default.kn) to rmdir_all_sub()?
>>
>> List rdtgroup_default.rdtgroup_list is added during the mount and had to
>> be removed during umount and rdt_root is destroyed here.
> 
> I do not think it is required for default resource group management to
> be tied with the resctrl files associated with default resource group.
> 
> I think rdtgroup_setup_root can be split in two, one for all the
> resctrl files that should be done at mount/unmount and one for the
> default group init done at __init.

Ok.
> 
>>>>  	kernfs_kill_sb(sb);
>>>>  	mutex_unlock(&rdtgroup_mutex);
>>>>  	cpus_read_unlock();
>>>> @@ -3598,10 +3616,8 @@ static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
>>>>  	.show_options	= rdtgroup_show_options,
>>>>  };
>>>>  
>>>> -static int __init rdtgroup_setup_root(void)
>>>> +static int rdtgroup_setup_root(void)
>>>>  {
>>>> -	int ret;
>>>> -
>>>>  	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
>>>>  				      KERNFS_ROOT_CREATE_DEACTIVATED |
>>>>  				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
>>>> @@ -3618,19 +3634,11 @@ static int __init rdtgroup_setup_root(void)
>>>>  
>>>>  	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
>>>>  
>>>> -	ret = rdtgroup_add_files(kernfs_root_to_node(rdt_root), RFTYPE_CTRL_BASE);
>>>> -	if (ret) {
>>>> -		kernfs_destroy_root(rdt_root);
>>>> -		goto out;
>>>> -	}
>>>> -
>>>>  	rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
>>>> -	kernfs_activate(rdtgroup_default.kn);
>>>>  
>>>> -out:
>>>>  	mutex_unlock(&rdtgroup_mutex);
>>>>  
>>>> -	return ret;
>>>> +	return 0;
>>>>  }
>>>>  
>>>>  static void domain_destroy_mon_state(struct rdt_domain *d)
>>>> @@ -3752,13 +3760,9 @@ int __init rdtgroup_init(void)
>>>>  	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
>>>>  		     sizeof(last_cmd_status_buf));
>>>>  
>>>> -	ret = rdtgroup_setup_root();
>>>> -	if (ret)
>>>> -		return ret;
>>>> -
>>>>  	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
>>>>  	if (ret)
>>>> -		goto cleanup_root;
>>>> +		return ret;
>>>>  
>>>
>>> It is not clear to me why this change is required, could you
>>> please elaborate? It seems that all that is needed is for 
>>> rdtgroup_add_files() to move to rdt_get_tree() (which you have done)
>>> and then an additional call to kernfs_remove() in rmdir_all_sub().
>>> I must be missing something, could you please help me understand?
>>>
>>
>> Yes. I started with that approach. But there are issues with that approach.
>>
>> Currently, rdt_root(which is rdtgroup_default.kn) is created during
>> rdtgroup_init. At the same time the root files are created. Also, default
>> group is added to rdt_all_groups. Basically, the root files and
>> rdtgroup_default group is always there even though filesystem is never
>> mounted. Also mbm_over and cqm_limbo workqueues are always running even
>> though filesystem is not mounted.
>>
>> I changed rdtgroup_add_files() to move to rdt_get_tree() and added
>> kernfs_remove() in rmdir_all_sub(). This caused problems. The
>> kernfs_remove(rdtgroup_default.kn) removes all the reference counts and
>> releases the root. When we mount again, we hit this this problem below.
>>
>> [  404.558461] ------------[ cut here ]------------
>> [  404.563631] WARNING: CPU: 35 PID: 7728 at fs/kernfs/dir.c:522
>> kernfs_new_node+0x63/0x70
>>
>> 404.778793]  ? __warn+0x81/0x140
>> [  404.782535]  ? kernfs_new_node+0x63/0x70
>> [  404.787036]  ? report_bug+0x102/0x200
>> [  404.791247]  ? handle_bug+0x3f/0x70
>> [  404.795269]  ? exc_invalid_op+0x13/0x60
>> [  404.799671]  ? asm_exc_invalid_op+0x16/0x20
>> [  404.804461]  ? kernfs_new_node+0x63/0x70
>> [  404.808954]  ? snprintf+0x49/0x70
>> [  404.812762]  __kernfs_create_file+0x30/0xc0
>> [  404.817534]  rdtgroup_add_files+0x6c/0x100
>>
>> Basically kernel says your rdt_root is not initialized. That is the reason
>> I had to move everything to mount time. The rdt_root is created and
>> initialized during the mount and also destroyed during the umount.
>> And I had to move rdt_enable_key check during rdt_root creation.
>>
> 
> ok, thank you for the additional details. I see now how this patch evolved.
> I understand how rdt_root needs to be created/destroyed
> during mount/unmount. If I understand correctly the changes to
> rdt_init_fs_context() was motivated by this line:
> 
> 	ctx->kfc.root = rdt_root;
> 
> ... that prompted you to move rdt_root creation there in order to have
> it present for this assignment and that prompted the
> rdt_enable_key check to follow. Is this correct?

That is correct.

> 
> I am concerned about the changes to rdt_init_fs_context() since it further
> separates the resctrl file management, it breaks the symmetry of the
> key checked and set, and finally these new actions seem unrelated to a function
> named "init_fs_context". I looked at other examples and from what I can tell
> it is not required that ctx->kfc.root be initialized within
> rdt_init_fs_context(). Looks like the value is required by kernfs_get_tree()
> that is called from rdt_get_tree(). For comparison I found cgroup_do_get_tree().
> Note how cgroup_do_get_tree(), within the .get_tree callback,
> initializes kernfs_fs_context.root and then call kernfs_get_tree()? 

Yes. I see that. Thanks for pointing.

> 
> It thus looks to me as though things can be simplified significantly
> if the kernfs_fs_context.root assignment is moved from rdt_init_fs_context()
> to rdt_get_tree(). rdt_get_tree() can then create rdt_root (and add all needed
> files), assign it to kernfs_fs_context.root and call kernfs_get_tree().
> 
> What do you think?

Yes. I think we can do that. Let me try it. Will let you know how it goes.
Thanks for the suggestion.
  

Patch

diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 2f5cdc638607..e03cb01c4742 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -57,6 +57,7 @@  static char last_cmd_status_buf[512];
 struct dentry *debugfs_resctrl;
 
 static bool resctrl_debug;
+static int rdtgroup_setup_root(void);
 
 void rdt_last_cmd_clear(void)
 {
@@ -2515,13 +2516,6 @@  static int rdt_get_tree(struct fs_context *fc)
 
 	cpus_read_lock();
 	mutex_lock(&rdtgroup_mutex);
-	/*
-	 * resctrl file system can only be mounted once.
-	 */
-	if (static_branch_unlikely(&rdt_enable_key)) {
-		ret = -EBUSY;
-		goto out;
-	}
 
 	ret = rdt_enable_ctx(ctx);
 	if (ret < 0)
@@ -2535,9 +2529,15 @@  static int rdt_get_tree(struct fs_context *fc)
 
 	closid_init();
 
+	ret = rdtgroup_add_files(rdtgroup_default.kn, RFTYPE_CTRL_BASE);
+	if (ret)
+		goto out_schemata_free;
+
+	kernfs_activate(rdtgroup_default.kn);
+
 	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
 	if (ret < 0)
-		goto out_schemata_free;
+		goto out_default;
 
 	if (rdt_mon_capable) {
 		ret = mongroup_create_dir(rdtgroup_default.kn,
@@ -2587,6 +2587,8 @@  static int rdt_get_tree(struct fs_context *fc)
 		kernfs_remove(kn_mongrp);
 out_info:
 	kernfs_remove(kn_info);
+out_default:
+	kernfs_remove(rdtgroup_default.kn);
 out_schemata_free:
 	schemata_list_destroy();
 out_mba:
@@ -2664,10 +2666,23 @@  static const struct fs_context_operations rdt_fs_context_ops = {
 static int rdt_init_fs_context(struct fs_context *fc)
 {
 	struct rdt_fs_context *ctx;
+	int ret;
+
+	/*
+	 * resctrl file system can only be mounted once.
+	 */
+	if (static_branch_unlikely(&rdt_enable_key))
+		return -EBUSY;
+
+	ret = rdtgroup_setup_root();
+	if (ret)
+		return ret;
 
 	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
-	if (!ctx)
+	if (!ctx) {
+		kernfs_destroy_root(rdt_root);
 		return -ENOMEM;
+	}
 
 	ctx->kfc.root = rdt_root;
 	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
@@ -2845,6 +2860,9 @@  static void rdt_kill_sb(struct super_block *sb)
 	static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
 	static_branch_disable_cpuslocked(&rdt_mon_enable_key);
 	static_branch_disable_cpuslocked(&rdt_enable_key);
+	/* Remove the default group and cleanup the root */
+	list_del(&rdtgroup_default.rdtgroup_list);
+	kernfs_destroy_root(rdt_root);
 	kernfs_kill_sb(sb);
 	mutex_unlock(&rdtgroup_mutex);
 	cpus_read_unlock();
@@ -3598,10 +3616,8 @@  static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
 	.show_options	= rdtgroup_show_options,
 };
 
-static int __init rdtgroup_setup_root(void)
+static int rdtgroup_setup_root(void)
 {
-	int ret;
-
 	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
 				      KERNFS_ROOT_CREATE_DEACTIVATED |
 				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
@@ -3618,19 +3634,11 @@  static int __init rdtgroup_setup_root(void)
 
 	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
 
-	ret = rdtgroup_add_files(kernfs_root_to_node(rdt_root), RFTYPE_CTRL_BASE);
-	if (ret) {
-		kernfs_destroy_root(rdt_root);
-		goto out;
-	}
-
 	rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
-	kernfs_activate(rdtgroup_default.kn);
 
-out:
 	mutex_unlock(&rdtgroup_mutex);
 
-	return ret;
+	return 0;
 }
 
 static void domain_destroy_mon_state(struct rdt_domain *d)
@@ -3752,13 +3760,9 @@  int __init rdtgroup_init(void)
 	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
 		     sizeof(last_cmd_status_buf));
 
-	ret = rdtgroup_setup_root();
-	if (ret)
-		return ret;
-
 	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
 	if (ret)
-		goto cleanup_root;
+		return ret;
 
 	ret = register_filesystem(&rdt_fs_type);
 	if (ret)
@@ -3791,8 +3795,6 @@  int __init rdtgroup_init(void)
 
 cleanup_mountpoint:
 	sysfs_remove_mount_point(fs_kobj, "resctrl");
-cleanup_root:
-	kernfs_destroy_root(rdt_root);
 
 	return ret;
 }
@@ -3802,5 +3804,4 @@  void __exit rdtgroup_exit(void)
 	debugfs_remove_recursive(debugfs_resctrl);
 	unregister_filesystem(&rdt_fs_type);
 	sysfs_remove_mount_point(fs_kobj, "resctrl");
-	kernfs_destroy_root(rdt_root);
 }