[3/6] sched/cpuset: Keep track of SCHED_DEADLINE task in cpusets

Message ID 20230329125558.255239-4-juri.lelli@redhat.com
State New
Headers
Series sched/deadline: cpuset: Rework DEADLINE bandwidth restoration |

Commit Message

Juri Lelli March 29, 2023, 12:55 p.m. UTC
  Qais reported that iterating over all tasks when rebuilding root domains
for finding out which ones are DEADLINE and need their bandwidth
correctly restored on such root domains can be a costly operation (10+
ms delays on suspend-resume).

To fix the problem keep track of the number of DEADLINE tasks belonging
to each cpuset and then use this information (followup patch) to only
perform the above iteration if DEADLINE tasks are actually present in
the cpuset for which a corresponding root domain is being rebuilt.

Reported-by: Qais Yousef <qyousef@layalina.io>
Link: https://lore.kernel.org/lkml/20230206221428.2125324-1-qyousef@layalina.io/
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
---
 include/linux/cpuset.h  |  4 ++++
 kernel/cgroup/cgroup.c  |  4 ++++
 kernel/cgroup/cpuset.c  | 25 +++++++++++++++++++++++++
 kernel/sched/deadline.c | 14 ++++++++++++++
 4 files changed, 47 insertions(+)
  

Comments

Qais Yousef April 4, 2023, 8:06 p.m. UTC | #1
On 03/29/23 14:55, Juri Lelli wrote:
> Qais reported that iterating over all tasks when rebuilding root domains
> for finding out which ones are DEADLINE and need their bandwidth
> correctly restored on such root domains can be a costly operation (10+
> ms delays on suspend-resume).
> 
> To fix the problem keep track of the number of DEADLINE tasks belonging
> to each cpuset and then use this information (followup patch) to only
> perform the above iteration if DEADLINE tasks are actually present in
> the cpuset for which a corresponding root domain is being rebuilt.
> 
> Reported-by: Qais Yousef <qyousef@layalina.io>
> Link: https://lore.kernel.org/lkml/20230206221428.2125324-1-qyousef@layalina.io/
> Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
> ---

Reviewed-by: Qais Yousef <qyousef@layalina.io>
Tested-by: Qais Yousef <qyousef@layalina.io>


Thanks!

--
Qais Yousef

>  include/linux/cpuset.h  |  4 ++++
>  kernel/cgroup/cgroup.c  |  4 ++++
>  kernel/cgroup/cpuset.c  | 25 +++++++++++++++++++++++++
>  kernel/sched/deadline.c | 14 ++++++++++++++
>  4 files changed, 47 insertions(+)
> 
> diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
> index 355f796c5f07..0348dba5680e 100644
> --- a/include/linux/cpuset.h
> +++ b/include/linux/cpuset.h
> @@ -71,6 +71,8 @@ extern void cpuset_init_smp(void);
>  extern void cpuset_force_rebuild(void);
>  extern void cpuset_update_active_cpus(void);
>  extern void cpuset_wait_for_hotplug(void);
> +extern void inc_dl_tasks_cs(struct task_struct *task);
> +extern void dec_dl_tasks_cs(struct task_struct *task);
>  extern void cpuset_lock(void);
>  extern void cpuset_unlock(void);
>  extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
> @@ -196,6 +198,8 @@ static inline void cpuset_update_active_cpus(void)
>  
>  static inline void cpuset_wait_for_hotplug(void) { }
>  
> +static inline void inc_dl_tasks_cs(struct task_struct *task) { }
> +static inline void dec_dl_tasks_cs(struct task_struct *task) { }
>  static inline void cpuset_lock(void) { }
>  static inline void cpuset_unlock(void) { }
>  
> diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
> index 935e8121b21e..ff27b2d2bf0b 100644
> --- a/kernel/cgroup/cgroup.c
> +++ b/kernel/cgroup/cgroup.c
> @@ -57,6 +57,7 @@
>  #include <linux/file.h>
>  #include <linux/fs_parser.h>
>  #include <linux/sched/cputime.h>
> +#include <linux/sched/deadline.h>
>  #include <linux/psi.h>
>  #include <net/sock.h>
>  
> @@ -6673,6 +6674,9 @@ void cgroup_exit(struct task_struct *tsk)
>  	list_add_tail(&tsk->cg_list, &cset->dying_tasks);
>  	cset->nr_tasks--;
>  
> +	if (dl_task(tsk))
> +		dec_dl_tasks_cs(tsk);
> +
>  	WARN_ON_ONCE(cgroup_task_frozen(tsk));
>  	if (unlikely(!(tsk->flags & PF_KTHREAD) &&
>  		     test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags)))
> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
> index fbc10b494292..eb0854ef9757 100644
> --- a/kernel/cgroup/cpuset.c
> +++ b/kernel/cgroup/cpuset.c
> @@ -193,6 +193,12 @@ struct cpuset {
>  	int use_parent_ecpus;
>  	int child_ecpus_count;
>  
> +	/*
> +	 * number of SCHED_DEADLINE tasks attached to this cpuset, so that we
> +	 * know when to rebuild associated root domain bandwidth information.
> +	 */
> +	int nr_deadline_tasks;
> +
>  	/* Invalid partition error code, not lock protected */
>  	enum prs_errcode prs_err;
>  
> @@ -245,6 +251,20 @@ static inline struct cpuset *parent_cs(struct cpuset *cs)
>  	return css_cs(cs->css.parent);
>  }
>  
> +void inc_dl_tasks_cs(struct task_struct *p)
> +{
> +	struct cpuset *cs = task_cs(p);
> +
> +	cs->nr_deadline_tasks++;
> +}
> +
> +void dec_dl_tasks_cs(struct task_struct *p)
> +{
> +	struct cpuset *cs = task_cs(p);
> +
> +	cs->nr_deadline_tasks--;
> +}
> +
>  /* bits in struct cpuset flags field */
>  typedef enum {
>  	CS_ONLINE,
> @@ -2477,6 +2497,11 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
>  		ret = security_task_setscheduler(task);
>  		if (ret)
>  			goto out_unlock;
> +
> +		if (dl_task(task)) {
> +			cs->nr_deadline_tasks++;
> +			cpuset_attach_old_cs->nr_deadline_tasks--;
> +		}
>  	}
>  
>  	/*
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index 4cc7e1ca066d..8f92f0f87383 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -16,6 +16,8 @@
>   *                    Fabio Checconi <fchecconi@gmail.com>
>   */
>  
> +#include <linux/cpuset.h>
> +
>  /*
>   * Default limits for DL period; on the top end we guard against small util
>   * tasks still getting ridiculously long effective runtimes, on the bottom end we
> @@ -2595,6 +2597,12 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
>  	if (task_on_rq_queued(p) && p->dl.dl_runtime)
>  		task_non_contending(p);
>  
> +	/*
> +	 * In case a task is setscheduled out from SCHED_DEADLINE we need to
> +	 * keep track of that on its cpuset (for correct bandwidth tracking).
> +	 */
> +	dec_dl_tasks_cs(p);
> +
>  	if (!task_on_rq_queued(p)) {
>  		/*
>  		 * Inactive timer is armed. However, p is leaving DEADLINE and
> @@ -2635,6 +2643,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
>  	if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
>  		put_task_struct(p);
>  
> +	/*
> +	 * In case a task is setscheduled to SCHED_DEADLINE we need to keep
> +	 * track of that on its cpuset (for correct bandwidth tracking).
> +	 */
> +	inc_dl_tasks_cs(p);
> +
>  	/* If p is not queued we will update its parameters at next wakeup. */
>  	if (!task_on_rq_queued(p)) {
>  		add_rq_bw(&p->dl, &rq->dl);
> -- 
> 2.39.2
>
  
Xia Fukun Oct. 9, 2023, 11:43 a.m. UTC | #2
On 2023/3/29 20:55, Juri Lelli wrote:

> To fix the problem keep track of the number of DEADLINE tasks belonging
> to each cpuset and then use this information (followup patch) to only
> perform the above iteration if DEADLINE tasks are actually present in
> the cpuset for which a corresponding root domain is being rebuilt.
>  
> diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
> index 935e8121b21e..ff27b2d2bf0b 100644

> @@ -6673,6 +6674,9 @@ void cgroup_exit(struct task_struct *tsk)
>  	list_add_tail(&tsk->cg_list, &cset->dying_tasks);
>  	cset->nr_tasks--;
>  
> +	if (dl_task(tsk))
> +		dec_dl_tasks_cs(tsk);
> +
>  	WARN_ON_ONCE(cgroup_task_frozen(tsk));
>  	if (unlikely(!(tsk->flags & PF_KTHREAD) &&
>  		     test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags)))


The cgroup_exit() function decrements the value of the nr_deadline_tasks by one.


> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
> index fbc10b494292..eb0854ef9757 100644
> --- a/kernel/cgroup/cpuset.c
> +++ b/kernel/cgroup/cpuset.c
> @@ -193,6 +193,12 @@ struct cpuset {
> +	/*
> +	 * number of SCHED_DEADLINE tasks attached to this cpuset, so that we
> +	 * know when to rebuild associated root domain bandwidth information.
> +	 */
> +	int nr_deadline_tasks;
> +

> +void inc_dl_tasks_cs(struct task_struct *p)
> +{
> +	struct cpuset *cs = task_cs(p);
> +
> +	cs->nr_deadline_tasks++;
> +}
> +
> +void dec_dl_tasks_cs(struct task_struct *p)
> +{
> +	struct cpuset *cs = task_cs(p);
> +
> +	cs->nr_deadline_tasks--;
> +}
> +

> @@ -2477,6 +2497,11 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
>  		ret = security_task_setscheduler(task);
>  		if (ret)
>  			goto out_unlock;
> +
> +		if (dl_task(task)) {
> +			cs->nr_deadline_tasks++;
> +			cpuset_attach_old_cs->nr_deadline_tasks--;
> +		}
>  	}


The cpuset_can_attach() function increments the value of the nr_deadline_tasks by one.


>  	/*
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index 4cc7e1ca066d..8f92f0f87383 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -16,6 +16,8 @@
>   *                    Fabio Checconi <fchecconi@gmail.com>
>   */
>  
> +#include <linux/cpuset.h>
> +
>  /*
>   * Default limits for DL period; on the top end we guard against small util
>   * tasks still getting ridiculously long effective runtimes, on the bottom end we
> @@ -2595,6 +2597,12 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
>  	if (task_on_rq_queued(p) && p->dl.dl_runtime)
>  		task_non_contending(p);
>  
> +	/*
> +	 * In case a task is setscheduled out from SCHED_DEADLINE we need to
> +	 * keep track of that on its cpuset (for correct bandwidth tracking).
> +	 */
> +	dec_dl_tasks_cs(p);
> +
>  	if (!task_on_rq_queued(p)) {
>  		/*
>  		 * Inactive timer is armed. However, p is leaving DEADLINE and
> @@ -2635,6 +2643,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
>  	if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
>  		put_task_struct(p);
>  
> +	/*
> +	 * In case a task is setscheduled to SCHED_DEADLINE we need to keep
> +	 * track of that on its cpuset (for correct bandwidth tracking).
> +	 */
> +	inc_dl_tasks_cs(p);
> +
>  	/* If p is not queued we will update its parameters at next wakeup. */
>  	if (!task_on_rq_queued(p)) {
>  		add_rq_bw(&p->dl, &rq->dl);


And both switched_from_dl() and switched_to_dl() can change the value of
nr_deadline_tasks.

I suspect that changing the values of the nr_deadline_tasks in these
4 paths will cause data race problems.

And this patch([PATCH 6/6] cgroup/cpuset: Iterate only if DEADLINE tasks are present)
has the following judgment:

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index f8ebec66da51..05c0a1255218 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1092,6 +1092,9 @@ static void dl_update_tasks_root_domain(struct cpuset *cs)
 	struct css_task_iter it;
 	struct task_struct *task;

+	if (cs->nr_deadline_tasks == 0)
+		return;
+
 	css_task_iter_start(&cs->css, 0, &it);

 	while ((task = css_task_iter_next(&it)))
--


The uncertainty of nr_deadline_tasks can lead to logical problems.

May I ask what experts think of the Data Race problem?

I would like to inquire if there is a problem and if so, is it
necessary to use atomic operations to avoid it?

Thank you all.
  
Waiman Long Oct. 9, 2023, 3:26 p.m. UTC | #3
On 10/9/23 07:43, Xia Fukun wrote:
> On 2023/3/29 20:55, Juri Lelli wrote:
>
>> To fix the problem keep track of the number of DEADLINE tasks belonging
>> to each cpuset and then use this information (followup patch) to only
>> perform the above iteration if DEADLINE tasks are actually present in
>> the cpuset for which a corresponding root domain is being rebuilt.
>>   
>> diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
>> index 935e8121b21e..ff27b2d2bf0b 100644
>> @@ -6673,6 +6674,9 @@ void cgroup_exit(struct task_struct *tsk)
>>   	list_add_tail(&tsk->cg_list, &cset->dying_tasks);
>>   	cset->nr_tasks--;
>>   
>> +	if (dl_task(tsk))
>> +		dec_dl_tasks_cs(tsk);
>> +
>>   	WARN_ON_ONCE(cgroup_task_frozen(tsk));
>>   	if (unlikely(!(tsk->flags & PF_KTHREAD) &&
>>   		     test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags)))
>
> The cgroup_exit() function decrements the value of the nr_deadline_tasks by one.
>
>
>> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
>> index fbc10b494292..eb0854ef9757 100644
>> --- a/kernel/cgroup/cpuset.c
>> +++ b/kernel/cgroup/cpuset.c
>> @@ -193,6 +193,12 @@ struct cpuset {
>> +	/*
>> +	 * number of SCHED_DEADLINE tasks attached to this cpuset, so that we
>> +	 * know when to rebuild associated root domain bandwidth information.
>> +	 */
>> +	int nr_deadline_tasks;
>> +
>> +void inc_dl_tasks_cs(struct task_struct *p)
>> +{
>> +	struct cpuset *cs = task_cs(p);
>> +
>> +	cs->nr_deadline_tasks++;
>> +}
>> +
>> +void dec_dl_tasks_cs(struct task_struct *p)
>> +{
>> +	struct cpuset *cs = task_cs(p);
>> +
>> +	cs->nr_deadline_tasks--;
>> +}
>> +
>> @@ -2477,6 +2497,11 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
>>   		ret = security_task_setscheduler(task);
>>   		if (ret)
>>   			goto out_unlock;
>> +
>> +		if (dl_task(task)) {
>> +			cs->nr_deadline_tasks++;
>> +			cpuset_attach_old_cs->nr_deadline_tasks--;
>> +		}
>>   	}
>
> The cpuset_can_attach() function increments the value of the nr_deadline_tasks by one.
>
>
>>   	/*
>> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>> index 4cc7e1ca066d..8f92f0f87383 100644
>> --- a/kernel/sched/deadline.c
>> +++ b/kernel/sched/deadline.c
>> @@ -16,6 +16,8 @@
>>    *                    Fabio Checconi <fchecconi@gmail.com>
>>    */
>>   
>> +#include <linux/cpuset.h>
>> +
>>   /*
>>    * Default limits for DL period; on the top end we guard against small util
>>    * tasks still getting ridiculously long effective runtimes, on the bottom end we
>> @@ -2595,6 +2597,12 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
>>   	if (task_on_rq_queued(p) && p->dl.dl_runtime)
>>   		task_non_contending(p);
>>   
>> +	/*
>> +	 * In case a task is setscheduled out from SCHED_DEADLINE we need to
>> +	 * keep track of that on its cpuset (for correct bandwidth tracking).
>> +	 */
>> +	dec_dl_tasks_cs(p);
>> +
>>   	if (!task_on_rq_queued(p)) {
>>   		/*
>>   		 * Inactive timer is armed. However, p is leaving DEADLINE and
>> @@ -2635,6 +2643,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
>>   	if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
>>   		put_task_struct(p);
>>   
>> +	/*
>> +	 * In case a task is setscheduled to SCHED_DEADLINE we need to keep
>> +	 * track of that on its cpuset (for correct bandwidth tracking).
>> +	 */
>> +	inc_dl_tasks_cs(p);
>> +
>>   	/* If p is not queued we will update its parameters at next wakeup. */
>>   	if (!task_on_rq_queued(p)) {
>>   		add_rq_bw(&p->dl, &rq->dl);
>
> And both switched_from_dl() and switched_to_dl() can change the value of
> nr_deadline_tasks.
>
> I suspect that changing the values of the nr_deadline_tasks in these
> 4 paths will cause data race problems.
>
> And this patch([PATCH 6/6] cgroup/cpuset: Iterate only if DEADLINE tasks are present)
> has the following judgment:
>
> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
> index f8ebec66da51..05c0a1255218 100644
> --- a/kernel/cgroup/cpuset.c
> +++ b/kernel/cgroup/cpuset.c
> @@ -1092,6 +1092,9 @@ static void dl_update_tasks_root_domain(struct cpuset *cs)
>   	struct css_task_iter it;
>   	struct task_struct *task;
>
> +	if (cs->nr_deadline_tasks == 0)
> +		return;
> +
>   	css_task_iter_start(&cs->css, 0, &it);
>
>   	while ((task = css_task_iter_next(&it)))
> --
>
>
> The uncertainty of nr_deadline_tasks can lead to logical problems.
>
> May I ask what experts think of the Data Race problem?
>
> I would like to inquire if there is a problem and if so, is it
> necessary to use atomic operations to avoid it?

It does look like the value of nr_deadline_tasks can be subjected to 
data race leading to incorrect value. Changing it to atomic_t should 
avoid that at the expense of a bit higher overhead.

Cheers,
Longman
  

Patch

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 355f796c5f07..0348dba5680e 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -71,6 +71,8 @@  extern void cpuset_init_smp(void);
 extern void cpuset_force_rebuild(void);
 extern void cpuset_update_active_cpus(void);
 extern void cpuset_wait_for_hotplug(void);
+extern void inc_dl_tasks_cs(struct task_struct *task);
+extern void dec_dl_tasks_cs(struct task_struct *task);
 extern void cpuset_lock(void);
 extern void cpuset_unlock(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
@@ -196,6 +198,8 @@  static inline void cpuset_update_active_cpus(void)
 
 static inline void cpuset_wait_for_hotplug(void) { }
 
+static inline void inc_dl_tasks_cs(struct task_struct *task) { }
+static inline void dec_dl_tasks_cs(struct task_struct *task) { }
 static inline void cpuset_lock(void) { }
 static inline void cpuset_unlock(void) { }
 
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 935e8121b21e..ff27b2d2bf0b 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -57,6 +57,7 @@ 
 #include <linux/file.h>
 #include <linux/fs_parser.h>
 #include <linux/sched/cputime.h>
+#include <linux/sched/deadline.h>
 #include <linux/psi.h>
 #include <net/sock.h>
 
@@ -6673,6 +6674,9 @@  void cgroup_exit(struct task_struct *tsk)
 	list_add_tail(&tsk->cg_list, &cset->dying_tasks);
 	cset->nr_tasks--;
 
+	if (dl_task(tsk))
+		dec_dl_tasks_cs(tsk);
+
 	WARN_ON_ONCE(cgroup_task_frozen(tsk));
 	if (unlikely(!(tsk->flags & PF_KTHREAD) &&
 		     test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags)))
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index fbc10b494292..eb0854ef9757 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -193,6 +193,12 @@  struct cpuset {
 	int use_parent_ecpus;
 	int child_ecpus_count;
 
+	/*
+	 * number of SCHED_DEADLINE tasks attached to this cpuset, so that we
+	 * know when to rebuild associated root domain bandwidth information.
+	 */
+	int nr_deadline_tasks;
+
 	/* Invalid partition error code, not lock protected */
 	enum prs_errcode prs_err;
 
@@ -245,6 +251,20 @@  static inline struct cpuset *parent_cs(struct cpuset *cs)
 	return css_cs(cs->css.parent);
 }
 
+void inc_dl_tasks_cs(struct task_struct *p)
+{
+	struct cpuset *cs = task_cs(p);
+
+	cs->nr_deadline_tasks++;
+}
+
+void dec_dl_tasks_cs(struct task_struct *p)
+{
+	struct cpuset *cs = task_cs(p);
+
+	cs->nr_deadline_tasks--;
+}
+
 /* bits in struct cpuset flags field */
 typedef enum {
 	CS_ONLINE,
@@ -2477,6 +2497,11 @@  static int cpuset_can_attach(struct cgroup_taskset *tset)
 		ret = security_task_setscheduler(task);
 		if (ret)
 			goto out_unlock;
+
+		if (dl_task(task)) {
+			cs->nr_deadline_tasks++;
+			cpuset_attach_old_cs->nr_deadline_tasks--;
+		}
 	}
 
 	/*
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 4cc7e1ca066d..8f92f0f87383 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -16,6 +16,8 @@ 
  *                    Fabio Checconi <fchecconi@gmail.com>
  */
 
+#include <linux/cpuset.h>
+
 /*
  * Default limits for DL period; on the top end we guard against small util
  * tasks still getting ridiculously long effective runtimes, on the bottom end we
@@ -2595,6 +2597,12 @@  static void switched_from_dl(struct rq *rq, struct task_struct *p)
 	if (task_on_rq_queued(p) && p->dl.dl_runtime)
 		task_non_contending(p);
 
+	/*
+	 * In case a task is setscheduled out from SCHED_DEADLINE we need to
+	 * keep track of that on its cpuset (for correct bandwidth tracking).
+	 */
+	dec_dl_tasks_cs(p);
+
 	if (!task_on_rq_queued(p)) {
 		/*
 		 * Inactive timer is armed. However, p is leaving DEADLINE and
@@ -2635,6 +2643,12 @@  static void switched_to_dl(struct rq *rq, struct task_struct *p)
 	if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
 		put_task_struct(p);
 
+	/*
+	 * In case a task is setscheduled to SCHED_DEADLINE we need to keep
+	 * track of that on its cpuset (for correct bandwidth tracking).
+	 */
+	inc_dl_tasks_cs(p);
+
 	/* If p is not queued we will update its parameters at next wakeup. */
 	if (!task_on_rq_queued(p)) {
 		add_rq_bw(&p->dl, &rq->dl);