@@ -548,6 +548,7 @@ struct sched_entity {
/* For load-balancing: */
struct load_weight load;
struct rb_node run_node;
+ struct rb_node latency_node;
struct list_head group_node;
unsigned int on_rq;
@@ -4361,6 +4361,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.nr_migrations = 0;
p->se.vruntime = 0;
INIT_LIST_HEAD(&p->se.group_node);
+ RB_CLEAR_NODE(&p->se.latency_node);
#ifdef CONFIG_FAIR_GROUP_SCHED
p->se.cfs_rq = NULL;
@@ -665,7 +665,76 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
return __node_2_se(last);
}
+#endif
+/**************************************************************
+ * Scheduling class tree data structure manipulation methods:
+ * for latency
+ */
+
+static inline bool latency_before(struct sched_entity *a,
+ struct sched_entity *b)
+{
+ return (s64)(a->vruntime + a->latency_offset - b->vruntime - b->latency_offset) < 0;
+}
+
+#define __latency_node_2_se(node) \
+ rb_entry((node), struct sched_entity, latency_node)
+
+static inline bool __latency_less(struct rb_node *a, const struct rb_node *b)
+{
+ return latency_before(__latency_node_2_se(a), __latency_node_2_se(b));
+}
+
+/*
+ * Enqueue an entity into the latency rb-tree:
+ */
+static void __enqueue_latency(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+{
+
+ /* Only latency sensitive entity can be added to the list */
+ if (se->latency_offset >= 0)
+ return;
+
+ if (!RB_EMPTY_NODE(&se->latency_node))
+ return;
+
+ /*
+ * An execution time less than sysctl_sched_min_granularity means that
+ * the entity has been preempted by a higher sched class or an entity
+ * with higher latency constraint.
+ * Put it back in the list so it gets a chance to run 1st during the
+ * next slice.
+ */
+ if (!(flags & ENQUEUE_WAKEUP)) {
+ u64 delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
+
+ if (delta_exec >= sysctl_sched_min_granularity)
+ return;
+ }
+
+ rb_add_cached(&se->latency_node, &cfs_rq->latency_timeline, __latency_less);
+}
+
+static void __dequeue_latency(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ if (!RB_EMPTY_NODE(&se->latency_node)) {
+ rb_erase_cached(&se->latency_node, &cfs_rq->latency_timeline);
+ RB_CLEAR_NODE(&se->latency_node);
+ }
+}
+
+static struct sched_entity *__pick_first_latency(struct cfs_rq *cfs_rq)
+{
+ struct rb_node *left = rb_first_cached(&cfs_rq->latency_timeline);
+
+ if (!left)
+ return NULL;
+
+ return __latency_node_2_se(left);
+}
+
+#ifdef CONFIG_SCHED_DEBUG
/**************************************************************
* Scheduling class statistics methods:
*/
@@ -4739,8 +4808,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
check_schedstat_required();
update_stats_enqueue_fair(cfs_rq, se, flags);
check_spread(cfs_rq, se);
- if (!curr)
+ if (!curr) {
__enqueue_entity(cfs_rq, se);
+ __enqueue_latency(cfs_rq, se, flags);
+ }
se->on_rq = 1;
if (cfs_rq->nr_running == 1) {
@@ -4826,8 +4897,10 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
clear_buddies(cfs_rq, se);
- if (se != cfs_rq->curr)
+ if (se != cfs_rq->curr) {
__dequeue_entity(cfs_rq, se);
+ __dequeue_latency(cfs_rq, se);
+ }
se->on_rq = 0;
account_entity_dequeue(cfs_rq, se);
@@ -4916,6 +4989,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
*/
update_stats_wait_end_fair(cfs_rq, se);
__dequeue_entity(cfs_rq, se);
+ __dequeue_latency(cfs_rq, se);
update_load_avg(cfs_rq, se, UPDATE_TG);
}
@@ -4954,7 +5028,7 @@ static struct sched_entity *
pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
{
struct sched_entity *left = __pick_first_entity(cfs_rq);
- struct sched_entity *se;
+ struct sched_entity *latency, *se;
/*
* If curr is set we have to see if its left of the leftmost entity
@@ -4996,6 +5070,12 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
se = cfs_rq->last;
}
+ /* Check for latency sensitive entity waiting for running */
+ latency = __pick_first_latency(cfs_rq);
+ if (latency && (latency != se) &&
+ wakeup_preempt_entity(latency, se) < 1)
+ se = latency;
+
return se;
}
@@ -5019,6 +5099,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
update_stats_wait_start_fair(cfs_rq, prev);
/* Put 'current' back into the tree. */
__enqueue_entity(cfs_rq, prev);
+ __enqueue_latency(cfs_rq, prev, 0);
/* in !on_rq case, update occurred at dequeue */
update_load_avg(cfs_rq, prev, 0);
}
@@ -12106,6 +12187,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
void init_cfs_rq(struct cfs_rq *cfs_rq)
{
cfs_rq->tasks_timeline = RB_ROOT_CACHED;
+ cfs_rq->latency_timeline = RB_ROOT_CACHED;
u64_u32_store(cfs_rq->min_vruntime, (u64)(-(1LL << 20)));
#ifdef CONFIG_SMP
raw_spin_lock_init(&cfs_rq->removed.lock);
@@ -12414,8 +12496,15 @@ int sched_group_set_latency(struct task_group *tg, s64 latency)
for_each_possible_cpu(i) {
struct sched_entity *se = tg->se[i];
+ struct rq *rq = cpu_rq(i);
+ struct rq_flags rf;
+
+ rq_lock_irqsave(rq, &rf);
+ __dequeue_latency(se->cfs_rq, se);
WRITE_ONCE(se->latency_offset, latency);
+
+ rq_unlock_irqrestore(rq, &rf);
}
mutex_unlock(&shares_mutex);
@@ -575,6 +575,7 @@ struct cfs_rq {
#endif
struct rb_root_cached tasks_timeline;
+ struct rb_root_cached latency_timeline;
/*
* 'curr' points to currently running entity on this cfs_rq.