[-next] sched/fair: Fix task migrated to isolated cpus

Message ID 20240301023109.336707-1-zhengzucheng@huawei.com
State New
Headers
Series [-next] sched/fair: Fix task migrated to isolated cpus |

Commit Message

zhengzucheng March 1, 2024, 2:31 a.m. UTC
  On the wakeup path with hyperthreading, select cpu only looks at
task->cpus_ptr to see if the task can run on the target cpu. If isolcpus
kernel parameter is set, and isolated cpus will be part of mask
task->cpus_ptr, tasks were migrated to our isolated cpus.

Steps to reproduce on my 32-CPU hyperthreads machine:
1. with boot parameter: "isolcpus=0,1"
   (thread lists: 0,16 and 1,17)
2. cgcreate -g cpuset:test
   echo 0-31 > /sys/fs/cgroup/cpuset/test/cpuset.cpus
   echo 0 > /sys/fs/cgroup/cpuset/test/cpuset.mems
   cgexec -g cpuset:test "test_threads"
3. some threads will be migrated to the isolated cpu0/1.

Fix it by checking the valid domain mask in select_idle_smt() and
select_idle_core()

Fixes: 9fe1f127b913 ("sched/fair: Merge select_idle_core/cpu()")
Fixes: 3e6efe87cd5c ("sched/fair: Remove redundant check in select_idle_smt()")
Cc: stable@vger.kernel.org # v5.12+
Signed-off-by: Zheng Zucheng <zhengzucheng@huawei.com>
---
 kernel/sched/fair.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)
  

Comments

Abel Wu March 1, 2024, 3:28 a.m. UTC | #1
Hi Zucheng, thanks for the bugfix. But it has already been addressed
by the following patches in tip/sched/core:

8aeaffef8c6e ("sched/fair: Take the scheduling domain into account in select_idle_smt()")
23d04d8c6b8e ("sched/fair: Take the scheduling domain into account in select_idle_core()")

On 3/1/24 10:31 AM, Zheng Zucheng Wrote:
> On the wakeup path with hyperthreading, select cpu only looks at
> task->cpus_ptr to see if the task can run on the target cpu. If isolcpus
> kernel parameter is set, and isolated cpus will be part of mask
> task->cpus_ptr, tasks were migrated to our isolated cpus.
> 
> Steps to reproduce on my 32-CPU hyperthreads machine:
> 1. with boot parameter: "isolcpus=0,1"
>     (thread lists: 0,16 and 1,17)
> 2. cgcreate -g cpuset:test
>     echo 0-31 > /sys/fs/cgroup/cpuset/test/cpuset.cpus
>     echo 0 > /sys/fs/cgroup/cpuset/test/cpuset.mems
>     cgexec -g cpuset:test "test_threads"
> 3. some threads will be migrated to the isolated cpu0/1.
> 
> Fix it by checking the valid domain mask in select_idle_smt() and
> select_idle_core()
> 
> Fixes: 9fe1f127b913 ("sched/fair: Merge select_idle_core/cpu()")
> Fixes: 3e6efe87cd5c ("sched/fair: Remove redundant check in select_idle_smt()")
> Cc: stable@vger.kernel.org # v5.12+
> Signed-off-by: Zheng Zucheng <zhengzucheng@huawei.com>
> ---
>   kernel/sched/fair.c | 12 ++++++------
>   1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 533547e3c90a..e6552c77e0f1 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7289,7 +7289,7 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
>   		if (!available_idle_cpu(cpu)) {
>   			idle = false;
>   			if (*idle_cpu == -1) {
> -				if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->cpus_ptr)) {
> +				if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, cpus)) {
>   					*idle_cpu = cpu;
>   					break;
>   				}
> @@ -7297,7 +7297,7 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
>   			}
>   			break;
>   		}
> -		if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr))
> +		if (*idle_cpu == -1 && cpumask_test_cpu(cpu, cpus))
>   			*idle_cpu = cpu;
>   	}
>   
> @@ -7311,12 +7311,12 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
>   /*
>    * Scan the local SMT mask for idle CPUs.
>    */
> -static int select_idle_smt(struct task_struct *p, int target)
> +static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
>   {
>   	int cpu;
>   
>   	for_each_cpu_and(cpu, cpu_smt_mask(target), p->cpus_ptr) {
> -		if (cpu == target)
> +		if (cpu == target || !cpumask_test_cpu(cpu, sched_domain_span(sd)))
>   			continue;
>   		if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
>   			return cpu;
> @@ -7341,7 +7341,7 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma
>   	return __select_idle_cpu(core, p);
>   }
>   
> -static inline int select_idle_smt(struct task_struct *p, int target)
> +static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
>   {
>   	return -1;
>   }
> @@ -7591,7 +7591,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
>   		has_idle_core = test_idle_cores(target);
>   
>   		if (!has_idle_core && cpus_share_cache(prev, target)) {
> -			i = select_idle_smt(p, prev);
> +			i = select_idle_smt(p, sd, prev);
>   			if ((unsigned int)i < nr_cpumask_bits)
>   				return i;
>   		}
  
zhengzucheng March 1, 2024, 8:54 a.m. UTC | #2
Ah yes indeed. I didn't notice that it was fixed,thanks!

在 2024/3/1 11:28, Abel Wu 写道:
> Hi Zucheng, thanks for the bugfix. But it has already been addressed
> by the following patches in tip/sched/core:
>
> 8aeaffef8c6e ("sched/fair: Take the scheduling domain into account in 
> select_idle_smt()")
> 23d04d8c6b8e ("sched/fair: Take the scheduling domain into account in 
> select_idle_core()")
>
> On 3/1/24 10:31 AM, Zheng Zucheng Wrote:
>> On the wakeup path with hyperthreading, select cpu only looks at
>> task->cpus_ptr to see if the task can run on the target cpu. If isolcpus
>> kernel parameter is set, and isolated cpus will be part of mask
>> task->cpus_ptr, tasks were migrated to our isolated cpus.
>>
>> Steps to reproduce on my 32-CPU hyperthreads machine:
>> 1. with boot parameter: "isolcpus=0,1"
>>     (thread lists: 0,16 and 1,17)
>> 2. cgcreate -g cpuset:test
>>     echo 0-31 > /sys/fs/cgroup/cpuset/test/cpuset.cpus
>>     echo 0 > /sys/fs/cgroup/cpuset/test/cpuset.mems
>>     cgexec -g cpuset:test "test_threads"
>> 3. some threads will be migrated to the isolated cpu0/1.
>>
>> Fix it by checking the valid domain mask in select_idle_smt() and
>> select_idle_core()
>>
>> Fixes: 9fe1f127b913 ("sched/fair: Merge select_idle_core/cpu()")
>> Fixes: 3e6efe87cd5c ("sched/fair: Remove redundant check in 
>> select_idle_smt()")
>> Cc: stable@vger.kernel.org # v5.12+
>> Signed-off-by: Zheng Zucheng <zhengzucheng@huawei.com>
>> ---
>>   kernel/sched/fair.c | 12 ++++++------
>>   1 file changed, 6 insertions(+), 6 deletions(-)
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index 533547e3c90a..e6552c77e0f1 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -7289,7 +7289,7 @@ static int select_idle_core(struct task_struct 
>> *p, int core, struct cpumask *cpu
>>           if (!available_idle_cpu(cpu)) {
>>               idle = false;
>>               if (*idle_cpu == -1) {
>> -                if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, 
>> p->cpus_ptr)) {
>> +                if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, 
>> cpus)) {
>>                       *idle_cpu = cpu;
>>                       break;
>>                   }
>> @@ -7297,7 +7297,7 @@ static int select_idle_core(struct task_struct 
>> *p, int core, struct cpumask *cpu
>>               }
>>               break;
>>           }
>> -        if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr))
>> +        if (*idle_cpu == -1 && cpumask_test_cpu(cpu, cpus))
>>               *idle_cpu = cpu;
>>       }
>>   @@ -7311,12 +7311,12 @@ static int select_idle_core(struct 
>> task_struct *p, int core, struct cpumask *cpu
>>   /*
>>    * Scan the local SMT mask for idle CPUs.
>>    */
>> -static int select_idle_smt(struct task_struct *p, int target)
>> +static int select_idle_smt(struct task_struct *p, struct 
>> sched_domain *sd, int target)
>>   {
>>       int cpu;
>>         for_each_cpu_and(cpu, cpu_smt_mask(target), p->cpus_ptr) {
>> -        if (cpu == target)
>> +        if (cpu == target || !cpumask_test_cpu(cpu, 
>> sched_domain_span(sd)))
>>               continue;
>>           if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
>>               return cpu;
>> @@ -7341,7 +7341,7 @@ static inline int select_idle_core(struct 
>> task_struct *p, int core, struct cpuma
>>       return __select_idle_cpu(core, p);
>>   }
>>   -static inline int select_idle_smt(struct task_struct *p, int target)
>> +static inline int select_idle_smt(struct task_struct *p, struct 
>> sched_domain *sd, int target)
>>   {
>>       return -1;
>>   }
>> @@ -7591,7 +7591,7 @@ static int select_idle_sibling(struct 
>> task_struct *p, int prev, int target)
>>           has_idle_core = test_idle_cores(target);
>>             if (!has_idle_core && cpus_share_cache(prev, target)) {
>> -            i = select_idle_smt(p, prev);
>> +            i = select_idle_smt(p, sd, prev);
>>               if ((unsigned int)i < nr_cpumask_bits)
>>                   return i;
>>           }
>
> .
  

Patch

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 533547e3c90a..e6552c77e0f1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7289,7 +7289,7 @@  static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
 		if (!available_idle_cpu(cpu)) {
 			idle = false;
 			if (*idle_cpu == -1) {
-				if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->cpus_ptr)) {
+				if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, cpus)) {
 					*idle_cpu = cpu;
 					break;
 				}
@@ -7297,7 +7297,7 @@  static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
 			}
 			break;
 		}
-		if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr))
+		if (*idle_cpu == -1 && cpumask_test_cpu(cpu, cpus))
 			*idle_cpu = cpu;
 	}
 
@@ -7311,12 +7311,12 @@  static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
 /*
  * Scan the local SMT mask for idle CPUs.
  */
-static int select_idle_smt(struct task_struct *p, int target)
+static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
 {
 	int cpu;
 
 	for_each_cpu_and(cpu, cpu_smt_mask(target), p->cpus_ptr) {
-		if (cpu == target)
+		if (cpu == target || !cpumask_test_cpu(cpu, sched_domain_span(sd)))
 			continue;
 		if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
 			return cpu;
@@ -7341,7 +7341,7 @@  static inline int select_idle_core(struct task_struct *p, int core, struct cpuma
 	return __select_idle_cpu(core, p);
 }
 
-static inline int select_idle_smt(struct task_struct *p, int target)
+static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
 {
 	return -1;
 }
@@ -7591,7 +7591,7 @@  static int select_idle_sibling(struct task_struct *p, int prev, int target)
 		has_idle_core = test_idle_cores(target);
 
 		if (!has_idle_core && cpus_share_cache(prev, target)) {
-			i = select_idle_smt(p, prev);
+			i = select_idle_smt(p, sd, prev);
 			if ((unsigned int)i < nr_cpumask_bits)
 				return i;
 		}