[v3,18/19] x86/resctrl: Add cpu offline callback for resctrl work

Message ID 20230320172620.18254-19-james.morse@arm.com
State New
Headers
Series x86/resctrl: monitored closid+rmid together, separate arch/fs locking |

Commit Message

James Morse March 20, 2023, 5:26 p.m. UTC
  The resctrl architecture specific code may need to free a domain when
a CPU goes offline, it also needs to reset the CPUs PQR_ASSOC register.
The resctrl filesystem code needs to move the overflow and limbo work
to run on a different CPU, and clear this CPU from the cpu_mask of
control and monitor groups.

Currently this is all done in core.c and called from
resctrl_offline_cpu(), making the split between architecture and
filesystem code unclear.

Move the filesystem work into a filesystem helper called
resctrl_offline_cpu(), and rename the one in core.c
resctrl_arch_offline_cpu().

The rdtgroup_mutex is unlocked and locked again in the call in
preparation for changing the locking rules for the architecture
code.

resctrl_offline_cpu() is called before any of the resource/domains
are updated, and makes use of the exclude_cpu feature that was
previously added.

Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
Signed-off-by: James Morse <james.morse@arm.com>
---
 arch/x86/kernel/cpu/resctrl/core.c     | 41 ++++----------------------
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 39 ++++++++++++++++++++++++
 include/linux/resctrl.h                |  1 +
 3 files changed, 45 insertions(+), 36 deletions(-)
  

Comments

Ilpo Järvinen March 21, 2023, 3:32 p.m. UTC | #1
On Mon, 20 Mar 2023, James Morse wrote:

> The resctrl architecture specific code may need to free a domain when
> a CPU goes offline, it also needs to reset the CPUs PQR_ASSOC register.
> The resctrl filesystem code needs to move the overflow and limbo work
> to run on a different CPU, and clear this CPU from the cpu_mask of
> control and monitor groups.
> 
> Currently this is all done in core.c and called from
> resctrl_offline_cpu(), making the split between architecture and
> filesystem code unclear.
> 
> Move the filesystem work into a filesystem helper called
> resctrl_offline_cpu(), and rename the one in core.c
> resctrl_arch_offline_cpu().
> 
> The rdtgroup_mutex is unlocked and locked again in the call in
> preparation for changing the locking rules for the architecture
> code.
> 
> resctrl_offline_cpu() is called before any of the resource/domains
> are updated, and makes use of the exclude_cpu feature that was
> previously added.
> 
> Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
>  arch/x86/kernel/cpu/resctrl/core.c     | 41 ++++----------------------
>  arch/x86/kernel/cpu/resctrl/rdtgroup.c | 39 ++++++++++++++++++++++++
>  include/linux/resctrl.h                |  1 +
>  3 files changed, 45 insertions(+), 36 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index aafe4b74587c..4e5fc89dab6d 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -578,22 +578,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
>  
>  		return;
>  	}
> -
> -	if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
> -		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
> -			cancel_delayed_work(&d->mbm_over);
> -			/*
> -			 * exclude_cpu=-1 as this CPU has already been removed
> -			 * by cpumask_clear_cpu()d
> -			 */

This was added in 17/19 and now removed (not moved) in 18/19. Please avoid 
such back-and-forth churn.
  
Reinette Chatre April 5, 2023, 11:48 p.m. UTC | #2
Hi James,

On 3/20/2023 10:26 AM, James Morse wrote:

> -static int resctrl_offline_cpu(unsigned int cpu)
> -{
> -	struct rdtgroup *rdtgrp;
>  	struct rdt_resource *r;
>  
>  	mutex_lock(&rdtgroup_mutex);
> +	resctrl_offline_cpu(cpu);
> +
>  	for_each_capable_rdt_resource(r)
>  		domain_remove_cpu(cpu, r);
> -	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
> -		if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
> -			clear_childcpus(rdtgrp, cpu);
> -			break;
> -		}
> -	}
>  	clear_closid_rmid(cpu);
>  	mutex_unlock(&rdtgroup_mutex);
>  

I find this and the previous patch to be very complicated. It is not clear
to me why resctrl_offline_cpu(cpu) is required to be before offline of domain.
Previous patch would not be needed if the existing order of operations
is maintained.

Reinette
  
James Morse April 27, 2023, 2:20 p.m. UTC | #3
Hi Ilpo,

On 21/03/2023 15:32, Ilpo Järvinen wrote:
> On Mon, 20 Mar 2023, James Morse wrote:
> 
>> The resctrl architecture specific code may need to free a domain when
>> a CPU goes offline, it also needs to reset the CPUs PQR_ASSOC register.
>> The resctrl filesystem code needs to move the overflow and limbo work
>> to run on a different CPU, and clear this CPU from the cpu_mask of
>> control and monitor groups.
>>
>> Currently this is all done in core.c and called from
>> resctrl_offline_cpu(), making the split between architecture and
>> filesystem code unclear.
>>
>> Move the filesystem work into a filesystem helper called
>> resctrl_offline_cpu(), and rename the one in core.c
>> resctrl_arch_offline_cpu().
>>
>> The rdtgroup_mutex is unlocked and locked again in the call in
>> preparation for changing the locking rules for the architecture
>> code.
>>
>> resctrl_offline_cpu() is called before any of the resource/domains
>> are updated, and makes use of the exclude_cpu feature that was
>> previously added.

>> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
>> index aafe4b74587c..4e5fc89dab6d 100644
>> --- a/arch/x86/kernel/cpu/resctrl/core.c
>> +++ b/arch/x86/kernel/cpu/resctrl/core.c
>> @@ -578,22 +578,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
>>  
>>  		return;
>>  	}
>> -
>> -	if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
>> -		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
>> -			cancel_delayed_work(&d->mbm_over);
>> -			/*
>> -			 * exclude_cpu=-1 as this CPU has already been removed
>> -			 * by cpumask_clear_cpu()d
>> -			 */
> 
> This was added in 17/19 and now removed (not moved) in 18/19. Please avoid 
> such back-and-forth churn.

This is the cost of making small incremental changes that should be easier to review.
The intermediate step was a little odd, so came with a comment. (I normally mark those as
'temporary', but didn't bother this time as they are adjacent patches)

If you'd prefer, I can merge these patches together... but from Reinette's feedback its
likely I'll split them up even more.


Thanks,

James
  
James Morse April 27, 2023, 2:20 p.m. UTC | #4
Hi Reinette,

On 06/04/2023 00:48, Reinette Chatre wrote:
> On 3/20/2023 10:26 AM, James Morse wrote:
> 
>> -static int resctrl_offline_cpu(unsigned int cpu)
>> -{
>> -	struct rdtgroup *rdtgrp;
>>  	struct rdt_resource *r;
>>  
>>  	mutex_lock(&rdtgroup_mutex);
>> +	resctrl_offline_cpu(cpu);
>> +
>>  	for_each_capable_rdt_resource(r)
>>  		domain_remove_cpu(cpu, r);
>> -	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
>> -		if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
>> -			clear_childcpus(rdtgrp, cpu);
>> -			break;
>> -		}
>> -	}
>>  	clear_closid_rmid(cpu);
>>  	mutex_unlock(&rdtgroup_mutex);
>>  
> 
> I find this and the previous patch to be very complicated.

It consolidates the parts of this that have nothing to do with the architecture specific code.
The extra work is because the semantics are: "this CPU is going away", the callee needs to
to not pick 'this CPU' again when updating any structures.

Ensuring the structures have not yet been modified by the architecture code is the
cleanest interface as there is nothing special about what the arch code provides to the
filesystem here.

I agree it looks like a special case, but only because the existing code is being called
halfway through the tear down, and depends on what the arch code has already done.

Having a single call, where nothing has been changed yet is the most maintainable option
as it avoids extra hooks, or an incomplete list of what has been torn down, and what
hasn't - some of which may be architecture specific.

It also avoids any interaction with how the architecture code chooses to prevent multiple
writers to the domain list - I don't want any of the filesystem code to depend on a lock
held by the architecture specific code.


> It is not clear
> to me why resctrl_offline_cpu(cpu) is required to be before offline of domain.
> Previous patch would not be needed if the existing order of operations
> is maintained.

The existing order is a bit of a soup.

You'd need a resctrl_domain_rebalance_helpers() to move the limbo and mbm workers, but
this would run after the CPU had been removed from the domain. Hopefully the name conveys
that it doesn't always run when a CPU is going offline.
resctrl_offline_cpu() would potentially run after the CPUs domains have been free()d,
depending on what gets added in the future this might be a problem, leading to a
resctrl_pre_offline_cpu() hook.

I worry this strange state leads to extra special-case'd filesystem code, and extra hooks.


I can split the consolidation of the filesystem code up in this patch, the
clear_childcpus() and limbo/mbm stuff can be done in separate patches, which might make it
easier on the eye.


Thanks,

James
  
Ilpo Järvinen April 27, 2023, 2:51 p.m. UTC | #5
On Thu, 27 Apr 2023, James Morse wrote:

> Hi Ilpo,
> 
> On 21/03/2023 15:32, Ilpo Järvinen wrote:
> > On Mon, 20 Mar 2023, James Morse wrote:
> > 
> >> The resctrl architecture specific code may need to free a domain when
> >> a CPU goes offline, it also needs to reset the CPUs PQR_ASSOC register.
> >> The resctrl filesystem code needs to move the overflow and limbo work
> >> to run on a different CPU, and clear this CPU from the cpu_mask of
> >> control and monitor groups.
> >>
> >> Currently this is all done in core.c and called from
> >> resctrl_offline_cpu(), making the split between architecture and
> >> filesystem code unclear.
> >>
> >> Move the filesystem work into a filesystem helper called
> >> resctrl_offline_cpu(), and rename the one in core.c
> >> resctrl_arch_offline_cpu().
> >>
> >> The rdtgroup_mutex is unlocked and locked again in the call in
> >> preparation for changing the locking rules for the architecture
> >> code.
> >>
> >> resctrl_offline_cpu() is called before any of the resource/domains
> >> are updated, and makes use of the exclude_cpu feature that was
> >> previously added.
> 
> >> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> >> index aafe4b74587c..4e5fc89dab6d 100644
> >> --- a/arch/x86/kernel/cpu/resctrl/core.c
> >> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> >> @@ -578,22 +578,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
> >>  
> >>  		return;
> >>  	}
> >> -
> >> -	if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
> >> -		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
> >> -			cancel_delayed_work(&d->mbm_over);
> >> -			/*
> >> -			 * exclude_cpu=-1 as this CPU has already been removed
> >> -			 * by cpumask_clear_cpu()d
> >> -			 */
> > 
> > This was added in 17/19 and now removed (not moved) in 18/19. Please avoid 
> > such back-and-forth churn.
> 
> This is the cost of making small incremental changes that should be easier to review.
> The intermediate step was a little odd, so came with a comment. (I normally mark those as
> 'temporary', but didn't bother this time as they are adjacent patches)

Why not mention the oddity at the end of changelog then? That keeps the 
diffs clean of temporary comments.

> If you'd prefer, I can merge these patches together... but from 
> Reinette's feedback its likely I'll split them up even more.

I don't prefer merging.
  

Patch

diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index aafe4b74587c..4e5fc89dab6d 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -578,22 +578,6 @@  static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 
 		return;
 	}
-
-	if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
-		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
-			cancel_delayed_work(&d->mbm_over);
-			/*
-			 * exclude_cpu=-1 as this CPU has already been removed
-			 * by cpumask_clear_cpu()d
-			 */
-			mbm_setup_overflow_handler(d, 0, RESCTRL_PICK_ANY_CPU);
-		}
-		if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
-		    has_busy_rmid(r, d)) {
-			cancel_delayed_work(&d->cqm_limbo);
-			cqm_setup_limbo_handler(d, 0, RESCTRL_PICK_ANY_CPU);
-		}
-	}
 }
 
 static void clear_closid_rmid(int cpu)
@@ -623,31 +607,15 @@  static int resctrl_arch_online_cpu(unsigned int cpu)
 	return err;
 }
 
-static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
+static int resctrl_arch_offline_cpu(unsigned int cpu)
 {
-	struct rdtgroup *cr;
-
-	list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
-		if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) {
-			break;
-		}
-	}
-}
-
-static int resctrl_offline_cpu(unsigned int cpu)
-{
-	struct rdtgroup *rdtgrp;
 	struct rdt_resource *r;
 
 	mutex_lock(&rdtgroup_mutex);
+	resctrl_offline_cpu(cpu);
+
 	for_each_capable_rdt_resource(r)
 		domain_remove_cpu(cpu, r);
-	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
-		if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
-			clear_childcpus(rdtgrp, cpu);
-			break;
-		}
-	}
 	clear_closid_rmid(cpu);
 	mutex_unlock(&rdtgroup_mutex);
 
@@ -970,7 +938,8 @@  static int __init resctrl_late_init(void)
 
 	state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
 				  "x86/resctrl/cat:online:",
-				  resctrl_arch_online_cpu, resctrl_offline_cpu);
+				  resctrl_arch_online_cpu,
+				  resctrl_arch_offline_cpu);
 	if (state < 0)
 		return state;
 
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index bf206bdb21ee..c27ec56c6c60 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -3710,6 +3710,45 @@  int resctrl_online_cpu(unsigned int cpu)
 	return 0;
 }
 
+static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
+{
+	struct rdtgroup *cr;
+
+	list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
+		if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask))
+			break;
+	}
+}
+
+void resctrl_offline_cpu(unsigned int cpu)
+{
+	struct rdt_domain *d;
+	struct rdtgroup *rdtgrp;
+	struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
+		if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
+			clear_childcpus(rdtgrp, cpu);
+			break;
+		}
+	}
+
+	d = get_domain_from_cpu(cpu, l3);
+	if (d) {
+		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
+			cancel_delayed_work(&d->mbm_over);
+			mbm_setup_overflow_handler(d, 0, cpu);
+		}
+		if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
+		    has_busy_rmid(l3, d)) {
+			cancel_delayed_work(&d->cqm_limbo);
+			cqm_setup_limbo_handler(d, 0, cpu);
+		}
+	}
+}
+
 /*
  * rdtgroup_init - rdtgroup initialization
  *
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 3ea7d618f33f..f053527aaa5b 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -226,6 +226,7 @@  u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
 int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d);
 void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
 int resctrl_online_cpu(unsigned int cpu);
+void resctrl_offline_cpu(unsigned int cpu);
 
 /**
  * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid