[RFC,2/2] sched/fair: Repurpose cfs_rq_throttled()

Message ID 20231130161245.3894682-3-vschneid@redhat.com
State New
Headers
Series sched/fair: Delay throttling to kernel exit |

Commit Message

Valentin Schneider Nov. 30, 2023, 4:12 p.m. UTC
  cfs_rq->throttled is now never set, as cfs_rq's are never fully throttled
but rather stay in limbo as tasks are slowly plucked out of them.

Get rid of cfs_rq->throttled, and repurpose cfs_rq_throttled() to use the
limbo meaning.

Signed-off-by: Valentin Schneider <vschneid@redhat.com>
---
 kernel/sched/fair.c  | 57 ++++++--------------------------------------
 kernel/sched/sched.h |  3 +--
 2 files changed, 8 insertions(+), 52 deletions(-)
  

Patch

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 27aee13e7ccd9..fd3a0c388fabd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5497,7 +5497,7 @@  static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
 	if (likely(cfs_rq->runtime_remaining > 0))
 		return;
 
-	if (cfs_rq->throttled || cfs_rq->in_throttle_limbo)
+	if (cfs_rq->in_throttle_limbo)
 		return;
 	/*
 	 * if we're unable to extend our runtime we resched so that the active
@@ -5518,7 +5518,7 @@  void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
 
 static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
 {
-	return cfs_bandwidth_used() && cfs_rq->throttled;
+	return cfs_bandwidth_used() && cfs_rq->in_throttle_limbo;
 }
 
 /* check whether cfs_rq, or any parent, is throttled */
@@ -5848,10 +5848,6 @@  void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 
 		qcfs_rq->h_nr_running += task_delta;
 		qcfs_rq->idle_h_nr_running += idle_task_delta;
-
-		/* end evaluation on encountering a throttled cfs_rq */
-		if (cfs_rq_throttled(qcfs_rq))
-			goto unthrottle_throttle;
 	}
 
 	for_each_sched_entity(se) {
@@ -5862,10 +5858,6 @@  void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 
 		qcfs_rq->h_nr_running += task_delta;
 		qcfs_rq->idle_h_nr_running += idle_task_delta;
-
-		/* end evaluation on encountering a throttled cfs_rq */
-		if (cfs_rq_throttled(qcfs_rq))
-			goto unthrottle_throttle;
 	}
 
 	/* At this point se is NULL and we are at root level*/
@@ -5909,7 +5901,7 @@  static void __cfsb_csd_unthrottle(void *arg)
 				 throttled_csd_list) {
 		list_del_init(&cursor->throttled_csd_list);
 
-		if (cfs_rq_throttled(cursor) || cursor->in_throttle_limbo)
+		if (cfs_rq_throttled(cursor))
 			unthrottle_cfs_rq(cursor);
 	}
 
@@ -5949,7 +5941,7 @@  static void unthrottle_cfs_rq_async(struct cfs_rq *cfs_rq)
 {
 	lockdep_assert_rq_held(rq_of(cfs_rq));
 
-	if (SCHED_WARN_ON(!(cfs_rq_throttled(cfs_rq) || cfs_rq->in_throttle_limbo) ||
+	if (SCHED_WARN_ON(!cfs_rq_throttled(cfs_rq) ||
 	    cfs_rq->runtime_remaining <= 0))
 		return;
 
@@ -5982,7 +5974,7 @@  static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
 		 * waiting for tasks to exit the kernel. In this case we still
 		 * want to replenish.
 		 */
-		if (!cfs_rq_throttled(cfs_rq) && !cfs_rq->in_throttle_limbo)
+		if (!cfs_rq_throttled(cfs_rq))
 			goto next;
 
 		/* Already queued for async unthrottle */
@@ -6031,7 +6023,7 @@  static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
 
 		list_del_init(&cfs_rq->throttled_csd_list);
 
-		if (cfs_rq_throttled(cfs_rq) || cfs_rq->in_throttle_limbo)
+		if (cfs_rq_throttled(cfs_rq))
 			unthrottle_cfs_rq(cfs_rq);
 
 		rq_unlock_irqrestore(rq, &rf);
@@ -6230,10 +6222,6 @@  static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
 	if (!cfs_rq->runtime_enabled || cfs_rq->curr)
 		return;
 
-	/* ensure the group is not already throttled */
-	if (cfs_rq_throttled(cfs_rq))
-		return;
-
 	/* update runtime allocation */
 	account_cfs_rq_runtime(cfs_rq, 0);
 	if (cfs_rq->runtime_remaining <= 0)
@@ -6266,13 +6254,6 @@  static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 	if (likely(!cfs_rq->runtime_enabled || cfs_rq->runtime_remaining > 0))
 		return false;
 
-	/*
-	 * it's possible for a throttled entity to be forced into a running
-	 * state (e.g. set_curr_task), in this case we're finished.
-	 */
-	if (cfs_rq_throttled(cfs_rq))
-		return true;
-
 	return throttle_cfs_rq(cfs_rq);
 }
 
@@ -6705,10 +6686,6 @@  enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		if (cfs_rq_is_idle(cfs_rq))
 			idle_h_nr_running = 1;
 
-		/* end evaluation on encountering a throttled cfs_rq */
-		if (cfs_rq_throttled(cfs_rq))
-			goto enqueue_throttle;
-
 		flags = ENQUEUE_WAKEUP;
 	}
 
@@ -6724,10 +6701,6 @@  enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 		if (cfs_rq_is_idle(cfs_rq))
 			idle_h_nr_running = 1;
-
-		/* end evaluation on encountering a throttled cfs_rq */
-		if (cfs_rq_throttled(cfs_rq))
-			goto enqueue_throttle;
 	}
 
 	/* At this point se is NULL and we are at root level*/
@@ -6750,7 +6723,6 @@  enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	if (!task_new)
 		update_overutilized_status(rq);
 
-enqueue_throttle:
 	assert_list_leaf_cfs_rq(rq);
 
 	hrtick_update(rq);
@@ -6783,10 +6755,6 @@  static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		if (cfs_rq_is_idle(cfs_rq))
 			idle_h_nr_running = 1;
 
-		/* end evaluation on encountering a throttled cfs_rq */
-		if (cfs_rq_throttled(cfs_rq))
-			goto dequeue_throttle;
-
 		/* Don't dequeue parent if it has other entities besides us */
 		if (cfs_rq->load.weight) {
 			/* Avoid re-evaluating load for this entity: */
@@ -6815,10 +6783,6 @@  static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		if (cfs_rq_is_idle(cfs_rq))
 			idle_h_nr_running = 1;
 
-		/* end evaluation on encountering a throttled cfs_rq */
-		if (cfs_rq_throttled(cfs_rq))
-			goto dequeue_throttle;
-
 	}
 
 	/* At this point se is NULL and we are at root level*/
@@ -6828,7 +6792,6 @@  static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
 		rq->next_balance = jiffies;
 
-dequeue_throttle:
 	util_est_update(&rq->cfs, p, task_sleep);
 	hrtick_update(rq);
 }
@@ -9582,7 +9545,7 @@  static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd)
  * Something like:
  *
  *	{ 0 1 2 3 } { 4 5 6 7 }
- *	        *     * * *
+ *		*     * * *
  *
  * If we were to balance group-wise we'd place two tasks in the first group and
  * two tasks in the second group. Clearly this is undesired as it will overload
@@ -12642,9 +12605,6 @@  static void propagate_entity_cfs_rq(struct sched_entity *se)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
-	if (cfs_rq_throttled(cfs_rq))
-		return;
-
 	if (!throttled_hierarchy(cfs_rq))
 		list_add_leaf_cfs_rq(cfs_rq);
 
@@ -12656,9 +12616,6 @@  static void propagate_entity_cfs_rq(struct sched_entity *se)
 
 		update_load_avg(cfs_rq, se, UPDATE_TG);
 
-		if (cfs_rq_throttled(cfs_rq))
-			break;
-
 		if (!throttled_hierarchy(cfs_rq))
 			list_add_leaf_cfs_rq(cfs_rq);
 	}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index be29154d93898..7f1afee52a776 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -631,9 +631,8 @@  struct cfs_rq {
 	u64			throttled_clock_pelt_time;
 	u64			throttled_clock_self;
 	u64			throttled_clock_self_time;
-	int			throttled;
-	int			throttle_count;
 	int                     in_throttle_limbo;
+	int			throttle_count;
 	/* Temp storage for updating the counts during unthrottling */
 	unsigned int            unthrottled_h_nr_running;
 	unsigned int            unthrottled_idle_h_nr_running;