[v3,10/19] x86/resctrl: Allow resctrl_arch_rmid_read() to sleep

Message ID 20230320172620.18254-11-james.morse@arm.com
State New
Headers
Series x86/resctrl: monitored closid+rmid together, separate arch/fs locking |

Commit Message

James Morse March 20, 2023, 5:26 p.m. UTC
  MPAM's cache occupancy counters can take a little while to settle once
the monitor has been configured. The maximum settling time is described
to the driver via a firmware table. The value could be large enough
that it makes sense to sleep.

To avoid exposing this to resctrl, it should be hidden behind MPAM's
resctrl_arch_rmid_read(). But add_rmid_to_limbo() calls
resctrl_arch_rmid_read() from a non-preemptible context.

add_rmid_to_limbo() is opportunistically reading the L3 occupancy counter
on this domain to avoid adding the RMID to limbo if this domain's value
has drifted below resctrl_rmid_realloc_threshold since the limbo handler
last ran. Determining 'this domain' involves disabling preeption to
prevent the thread being migrated to CPUs in a different domain between
the check and resctrl_arch_rmid_read() call. The check is skipped
for all remote domains.

Instead, call resctrl_arch_rmid_read() for each domain, and get it to
read the arch specific counter via IPI if its called on a CPU outside
the target domain. By covering remote domains, this change stops the
limbo handler from being started unnecessarily if a remote domain is
below the threshold.

This also allows resctrl_arch_rmid_read() to sleep.

Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
Signed-off-by: James Morse <james.morse@arm.com>
---
The alternative is to remove the counter read from this path altogether,
and assume user-space would never try to re-allocate the last RMID before
the limbo handler runs next.
---
 arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 12 +-----
 arch/x86/kernel/cpu/resctrl/monitor.c     | 48 +++++++++++++++--------
 2 files changed, 33 insertions(+), 27 deletions(-)
  

Comments

Reinette Chatre March 31, 2023, 11:26 p.m. UTC | #1
Hi James,

On 3/20/2023 10:26 AM, James Morse wrote:

...

>  int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
>  			   u32 closid, u32 rmid, enum resctrl_event_id eventid,
>  			   u64 *val)
>  {
>  	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
>  	struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
> +	struct __rmid_read_arg arg;
>  	struct arch_mbm_state *am;
>  	u64 msr_val, chunks;
> -	int ret;
> +	int err;
>  
> -	if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask))
> -		return -EINVAL;
> +	arg.rmid = rmid;
> +	arg.eventid = eventid;
>  
> -	ret = __rmid_read(rmid, eventid, &msr_val);
> -	if (ret)
> -		return ret;
> +	err = smp_call_function_any(&d->cpu_mask, smp_call_rmid_read, &arg, true);
> +	if (err)
> +		return err;

This seems to break the assumption of expected return values. __mon_event_count()
does: 
	rr->err = resctrl_arch_rmid_read()

and later rdtgroup_mondata_show() only expects -EIO or -EINVAL as errors, with
default of success.


> +	if (arg.err)
> +		return arg.err;
> +	msr_val = arg.msr_val;
>  


Reinette
  
James Morse April 27, 2023, 2:12 p.m. UTC | #2
Hi Reinette,

On 01/04/2023 00:26, Reinette Chatre wrote:
> On 3/20/2023 10:26 AM, James Morse wrote:
> 
> ...
> 
>>  int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
>>  			   u32 closid, u32 rmid, enum resctrl_event_id eventid,
>>  			   u64 *val)
>>  {
>>  	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
>>  	struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
>> +	struct __rmid_read_arg arg;
>>  	struct arch_mbm_state *am;
>>  	u64 msr_val, chunks;
>> -	int ret;
>> +	int err;
>>  
>> -	if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask))
>> -		return -EINVAL;
>> +	arg.rmid = rmid;
>> +	arg.eventid = eventid;
>>  
>> -	ret = __rmid_read(rmid, eventid, &msr_val);
>> -	if (ret)
>> -		return ret;
>> +	err = smp_call_function_any(&d->cpu_mask, smp_call_rmid_read, &arg, true);
>> +	if (err)
>> +		return err;
> 
> This seems to break the assumption of expected return values. __mon_event_count()
> does: 
> 	rr->err = resctrl_arch_rmid_read()
> 
> and later rdtgroup_mondata_show() only expects -EIO or -EINVAL as errors, with
> default of success.

Yes, looks like I dithered on whether cpus_read_lock() should be held over this function,
or it should tolerate the error. This is protected by rdtgroup_mutex, which means the
hotplug callbacks can't run concurrently, so the error can't occur.

I'll change it to ignore the return value.


Thanks,

James
  

Patch

diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index b06e86839d00..9161bc95eea7 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -543,16 +543,8 @@  void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
 	rr->val = 0;
 	rr->first = first;
 
-	cpu = get_cpu();
-	if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
-		mon_event_count(rr);
-		put_cpu();
-	} else {
-		put_cpu();
-
-		cpu = cpumask_any_housekeeping(&d->cpu_mask);
-		smp_call_on_cpu(cpu, mon_event_count, rr, false);
-	}
+	cpu = cpumask_any_housekeeping(&d->cpu_mask);
+	smp_call_on_cpu(cpu, mon_event_count, rr, false);
 }
 
 int rdtgroup_mondata_show(struct seq_file *m, void *arg)
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 5e9e876c3409..de72df06b37b 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -253,22 +253,42 @@  static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
 	return chunks >> shift;
 }
 
+struct __rmid_read_arg
+{
+	u32 rmid;
+	enum resctrl_event_id eventid;
+
+	u64 msr_val;
+	int err;
+};
+
+static void smp_call_rmid_read(void *_arg)
+{
+	struct __rmid_read_arg *arg = _arg;
+
+	arg->err = __rmid_read(arg->rmid, arg->eventid, &arg->msr_val);
+}
+
 int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
 			   u32 closid, u32 rmid, enum resctrl_event_id eventid,
 			   u64 *val)
 {
 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
 	struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+	struct __rmid_read_arg arg;
 	struct arch_mbm_state *am;
 	u64 msr_val, chunks;
-	int ret;
+	int err;
 
-	if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask))
-		return -EINVAL;
+	arg.rmid = rmid;
+	arg.eventid = eventid;
 
-	ret = __rmid_read(rmid, eventid, &msr_val);
-	if (ret)
-		return ret;
+	err = smp_call_function_any(&d->cpu_mask, smp_call_rmid_read, &arg, true);
+	if (err)
+		return err;
+	if (arg.err)
+		return arg.err;
+	msr_val = arg.msr_val;
 
 	am = get_arch_mbm_state(hw_dom, rmid, eventid);
 	if (am) {
@@ -424,23 +444,18 @@  static void add_rmid_to_limbo(struct rmid_entry *entry)
 {
 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
 	struct rdt_domain *d;
-	int cpu, err;
 	u64 val = 0;
 	u32 idx;
+	int err;
 
 	idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
 
 	entry->busy = 0;
-	cpu = get_cpu();
 	list_for_each_entry(d, &r->domains, list) {
-		if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
-			err = resctrl_arch_rmid_read(r, d, entry->closid,
-						     entry->rmid,
-						     QOS_L3_OCCUP_EVENT_ID,
-						     &val);
-			if (err || val <= resctrl_rmid_realloc_threshold)
-				continue;
-		}
+		err = resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
+					     QOS_L3_OCCUP_EVENT_ID, &val);
+		if (err || val <= resctrl_rmid_realloc_threshold)
+			continue;
 
 		/*
 		 * For the first limbo RMID in the domain,
@@ -451,7 +466,6 @@  static void add_rmid_to_limbo(struct rmid_entry *entry)
 		set_bit(idx, d->rmid_busy_llc);
 		entry->busy++;
 	}
-	put_cpu();
 
 	if (entry->busy)
 		rmid_limbo_count++;