[v6,04/24] x86/resctrl: Move rmid allocation out of mkdir_rdt_prepare()

Message ID 20230914172138.11977-5-james.morse@arm.com
State New
Headers
Series x86/resctrl: monitored closid+rmid together, separate arch/fs locking |

Commit Message

James Morse Sept. 14, 2023, 5:21 p.m. UTC
  RMID are allocated for each monitor or control group directory, because
each of these needs its own RMID. For control groups,
rdtgroup_mkdir_ctrl_mon() later goes on to allocate the CLOSID.

MPAM's equivalent of RMID is not an independent number, so can't be
allocated until the CLOSID is known. An RMID allocation for one CLOSID
may fail, whereas another may succeed depending on how many monitor
groups a control group has.

The RMID allocation needs to move to be after the CLOSID has been
allocated.

Move the RMID allocation out of mkdir_rdt_prepare() to occur in its caller,
after the mkdir_rdt_prepare() call. This allows the RMID allocator to
know the CLOSID.

Reviewed-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
Tested-By: Peter Newman <peternewman@google.com>
Signed-off-by: James Morse <james.morse@arm.com>
---
Changes since v2:
 * Moved kernfs_activate() later to preserve atomicity of files being visible

Changes since v5:
 * Renamed out_id_free as out_closid_free.
---
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 35 +++++++++++++++++++-------
 1 file changed, 26 insertions(+), 9 deletions(-)
  

Comments

Reinette Chatre Oct. 3, 2023, 9:07 p.m. UTC | #1
Hi James,

On 9/14/2023 10:21 AM, James Morse wrote:
> RMID are allocated for each monitor or control group directory, because
> each of these needs its own RMID. For control groups,
> rdtgroup_mkdir_ctrl_mon() later goes on to allocate the CLOSID.
> 
> MPAM's equivalent of RMID is not an independent number, so can't be
> allocated until the CLOSID is known. An RMID allocation for one CLOSID
> may fail, whereas another may succeed depending on how many monitor
> groups a control group has.
> 
> The RMID allocation needs to move to be after the CLOSID has been
> allocated.
> 
> Move the RMID allocation out of mkdir_rdt_prepare() to occur in its caller,
> after the mkdir_rdt_prepare() call. This allows the RMID allocator to
> know the CLOSID.
> 
> Reviewed-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
> Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
> Tested-By: Peter Newman <peternewman@google.com>
> Signed-off-by: James Morse <james.morse@arm.com>
> ---

Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>

Reinette
  
Moger, Babu Oct. 4, 2023, 6:01 p.m. UTC | #2
Hi James,

On 9/14/23 12:21, James Morse wrote:
> RMID are allocated for each monitor or control group directory, because
> each of these needs its own RMID. For control groups,
> rdtgroup_mkdir_ctrl_mon() later goes on to allocate the CLOSID.
> 
> MPAM's equivalent of RMID is not an independent number, so can't be
> allocated until the CLOSID is known. An RMID allocation for one CLOSID
> may fail, whereas another may succeed depending on how many monitor
> groups a control group has.
> 
> The RMID allocation needs to move to be after the CLOSID has been
> allocated.
> 
> Move the RMID allocation out of mkdir_rdt_prepare() to occur in its caller,
> after the mkdir_rdt_prepare() call. This allows the RMID allocator to
> know the CLOSID.
> 
> Reviewed-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
> Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
> Tested-By: Peter Newman <peternewman@google.com>
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> Changes since v2:
>  * Moved kernfs_activate() later to preserve atomicity of files being visible
> 
> Changes since v5:
>  * Renamed out_id_free as out_closid_free.
> ---
>  arch/x86/kernel/cpu/resctrl/rdtgroup.c | 35 +++++++++++++++++++-------
>  1 file changed, 26 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 7a7369a323b5..d25cb8c9a20e 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -3189,6 +3189,12 @@ static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
>  	return 0;
>  }
>  
> +static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
> +{
> +	if (rdt_mon_capable)
> +		free_rmid(rgrp->mon.rmid);
> +}
> +
>  static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
>  			     const char *name, umode_t mode,
>  			     enum rdt_group_type rtype, struct rdtgroup **r)
> @@ -3254,12 +3260,6 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
>  		goto out_destroy;
>  	}
>  
> -	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
> -	if (ret)
> -		goto out_destroy;
> -
> -	kernfs_activate(kn);

You should not remove "kernfs_activate(kn); from here (only the last line).

kernfs_create_dir is called in this function.

/* kernfs creates the directory for rdtgrp */
 kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);


There should be matching kernfs_activate.

> -
>  	/*
>  	 * The caller unlocks the parent_kn upon success.
>  	 */
> @@ -3278,7 +3278,6 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
>  static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
>  {
>  	kernfs_remove(rgrp->kn);
> -	free_rmid(rgrp->mon.rmid);
>  	rdtgroup_remove(rgrp);
>  }
>  
> @@ -3300,12 +3299,21 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
>  	prgrp = rdtgrp->mon.parent;
>  	rdtgrp->closid = prgrp->closid;
>  
> +	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
> +	if (ret) {
> +		mkdir_rdt_prepare_clean(rdtgrp);
> +		goto out_unlock;
> +	}
> +
> +	kernfs_activate(rdtgrp->kn);

I dont see the need for this. There is kernfs_activate  inside
mkdir_rdt_prepare_rmid_alloc (mkdir_rdt_prepare_rmid_alloc
->mkdir_mondata_all)  for all the files created.
Also mkdir_rdt_prepare already has kernfs_activate for the files it created.


> +
>  	/*
>  	 * Add the rdtgrp to the list of rdtgrps the parent
>  	 * ctrl_mon group has to track.
>  	 */
>  	list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
>  
> +out_unlock:
>  	rdtgroup_kn_unlock(parent_kn);
>  	return ret;
>  }
> @@ -3336,9 +3344,16 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
>  	ret = 0;
>  
>  	rdtgrp->closid = closid;
> +
> +	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
> +	if (ret)
> +		goto out_closid_free;
> +
> +	kernfs_activate(rdtgrp->kn);
> +

Same as above.

>  	ret = rdtgroup_init_alloc(rdtgrp);
>  	if (ret < 0)
> -		goto out_id_free;
> +		goto out_rmid_free;
>  
>  	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
>  
> @@ -3358,7 +3373,9 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
>  
>  out_del_list:
>  	list_del(&rdtgrp->rdtgroup_list);
> -out_id_free:
> +out_rmid_free:
> +	mkdir_rdt_prepare_rmid_free(rdtgrp);
> +out_closid_free:
>  	closid_free(closid);
>  out_common_fail:
>  	mkdir_rdt_prepare_clean(rdtgrp);
  
James Morse Oct. 5, 2023, 5:06 p.m. UTC | #3
Hi Babu,

On 04/10/2023 19:01, Moger, Babu wrote:
> On 9/14/23 12:21, James Morse wrote:
>> RMID are allocated for each monitor or control group directory, because
>> each of these needs its own RMID. For control groups,
>> rdtgroup_mkdir_ctrl_mon() later goes on to allocate the CLOSID.
>>
>> MPAM's equivalent of RMID is not an independent number, so can't be
>> allocated until the CLOSID is known. An RMID allocation for one CLOSID
>> may fail, whereas another may succeed depending on how many monitor
>> groups a control group has.
>>
>> The RMID allocation needs to move to be after the CLOSID has been
>> allocated.
>>
>> Move the RMID allocation out of mkdir_rdt_prepare() to occur in its caller,
>> after the mkdir_rdt_prepare() call. This allows the RMID allocator to
>> know the CLOSID.

>> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> index 7a7369a323b5..d25cb8c9a20e 100644
>> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> @@ -3189,6 +3189,12 @@ static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
>>  	return 0;
>>  }
>>  
>> +static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
>> +{
>> +	if (rdt_mon_capable)
>> +		free_rmid(rgrp->mon.rmid);
>> +}
>> +
>>  static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
>>  			     const char *name, umode_t mode,
>>  			     enum rdt_group_type rtype, struct rdtgroup **r)
>> @@ -3254,12 +3260,6 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
>>  		goto out_destroy;
>>  	}
>>  
>> -	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
>> -	if (ret)
>> -		goto out_destroy;
>> -
>> -	kernfs_activate(kn);
> 
> You should not remove "kernfs_activate(kn); from here (only the last line).
> 
> kernfs_create_dir is called in this function.
> 
> /* kernfs creates the directory for rdtgrp */
>  kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
> 
> 
> There should be matching kernfs_activate.

I think your point is kernfs_activate() should have been called by the time
mkdir_rdt_prepare() returns because it creates other directories. I don't think this
matters because kernfs_activate() is a tree operation. Sure, the control/monitor group
directory isn't visible once mkdir_rdt_prepare() returns, but by the time either of its
two callers return, changes to the directory tree have been activated.

Moving these lines is the to ensure user-space doesn't see the control/monitor group as
existing without the mon_data directory that is created by mkdir_rdt_prepare_rmid_alloc().


>> -
>>  	/*
>>  	 * The caller unlocks the parent_kn upon success.
>>  	 */
>> @@ -3278,7 +3278,6 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
>>  static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
>>  {
>>  	kernfs_remove(rgrp->kn);
>> -	free_rmid(rgrp->mon.rmid);
>>  	rdtgroup_remove(rgrp);
>>  }
>>  
>> @@ -3300,12 +3299,21 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
>>  	prgrp = rdtgrp->mon.parent;
>>  	rdtgrp->closid = prgrp->closid;
>>  
>> +	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
>> +	if (ret) {
>> +		mkdir_rdt_prepare_clean(rdtgrp);
>> +		goto out_unlock;
>> +	}
>> +
>> +	kernfs_activate(rdtgrp->kn);
> 
> I dont see the need for this. There is kernfs_activate  inside
> mkdir_rdt_prepare_rmid_alloc (mkdir_rdt_prepare_rmid_alloc
> ->mkdir_mondata_all)  for all the files created.

> Also mkdir_rdt_prepare already has kernfs_activate for the files it created.

It does, and this makes the mon_data directory visible in the parent control/monitor group
- but that control/monitor group isn't visible until this kernfs_activate(rdtgrp->kn)
makes it visible. The scope of these tree operations is different.

Looking at this again, there is an existing problem with the mon_groups directory not
being visible until after the control/monitor group is visible, worse is that if the
mon_group directory creation fails, the control/monitor group is removed. Chances are
no-one is depending on this.

I do think ultimately these kernfs_activate() calls should be moved to the end of the
syscall helpers that change the directory structure. This would stop things being briefly
visible.



Thanks!

James
  

Patch

diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 7a7369a323b5..d25cb8c9a20e 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -3189,6 +3189,12 @@  static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
 	return 0;
 }
 
+static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
+{
+	if (rdt_mon_capable)
+		free_rmid(rgrp->mon.rmid);
+}
+
 static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 			     const char *name, umode_t mode,
 			     enum rdt_group_type rtype, struct rdtgroup **r)
@@ -3254,12 +3260,6 @@  static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 		goto out_destroy;
 	}
 
-	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
-	if (ret)
-		goto out_destroy;
-
-	kernfs_activate(kn);
-
 	/*
 	 * The caller unlocks the parent_kn upon success.
 	 */
@@ -3278,7 +3278,6 @@  static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
 {
 	kernfs_remove(rgrp->kn);
-	free_rmid(rgrp->mon.rmid);
 	rdtgroup_remove(rgrp);
 }
 
@@ -3300,12 +3299,21 @@  static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
 	prgrp = rdtgrp->mon.parent;
 	rdtgrp->closid = prgrp->closid;
 
+	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
+	if (ret) {
+		mkdir_rdt_prepare_clean(rdtgrp);
+		goto out_unlock;
+	}
+
+	kernfs_activate(rdtgrp->kn);
+
 	/*
 	 * Add the rdtgrp to the list of rdtgrps the parent
 	 * ctrl_mon group has to track.
 	 */
 	list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
 
+out_unlock:
 	rdtgroup_kn_unlock(parent_kn);
 	return ret;
 }
@@ -3336,9 +3344,16 @@  static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
 	ret = 0;
 
 	rdtgrp->closid = closid;
+
+	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
+	if (ret)
+		goto out_closid_free;
+
+	kernfs_activate(rdtgrp->kn);
+
 	ret = rdtgroup_init_alloc(rdtgrp);
 	if (ret < 0)
-		goto out_id_free;
+		goto out_rmid_free;
 
 	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
 
@@ -3358,7 +3373,9 @@  static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
 
 out_del_list:
 	list_del(&rdtgrp->rdtgroup_list);
-out_id_free:
+out_rmid_free:
+	mkdir_rdt_prepare_rmid_free(rdtgrp);
+out_closid_free:
 	closid_free(closid);
 out_common_fail:
 	mkdir_rdt_prepare_clean(rdtgrp);