sched: fix throttle accounting with nested bandwidth limits
Commit Message
This fixes two issues:
- throttled_clock should only be set on the group that is actually
getting throttled
- self-throttled time should only be accounted on entry/exit to
throttled state when we have nested limits
Fixes: 88cb2868250c ("sched: add throttled time stat for throttled children")
Fixes: 3ab150d011da ("sched: don't account throttle time for empty groups")
Signed-off-by: Josh Don <joshdon@google.com>
---
kernel/sched/fair.c | 25 +++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)
Comments
On Thu, Jun 15, 2023 at 01:12:52PM -0700, Josh Don wrote:
> This fixes two issues:
> - throttled_clock should only be set on the group that is actually
> getting throttled
> - self-throttled time should only be accounted on entry/exit to
> throttled state when we have nested limits
>
> Fixes: 88cb2868250c ("sched: add throttled time stat for throttled children")
> Fixes: 3ab150d011da ("sched: don't account throttle time for empty groups")
> Signed-off-by: Josh Don <joshdon@google.com>
Hurmph, those are not the sha1 I have in tip/sched/core.
Also, should I rebase and just pull those patches so we can try again?
On Fri, Jun 16, 2023 at 6:33 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Thu, Jun 15, 2023 at 01:12:52PM -0700, Josh Don wrote:
> > This fixes two issues:
> > - throttled_clock should only be set on the group that is actually
> > getting throttled
> > - self-throttled time should only be accounted on entry/exit to
> > throttled state when we have nested limits
> >
> > Fixes: 88cb2868250c ("sched: add throttled time stat for throttled children")
> > Fixes: 3ab150d011da ("sched: don't account throttle time for empty groups")
> > Signed-off-by: Josh Don <joshdon@google.com>
>
> Hurmph, those are not the sha1 I have in tip/sched/core.
>
> Also, should I rebase and just pull those patches so we can try again?
Oh whoops, I used the SHA's from your queue.git.
Yes, if it is still possible to intercept them then that would be
great. I'm OOO at the moment, but can respin those for you first thing
next week.
Thanks,
Josh
On Fri, Jun 16, 2023 at 07:47:24AM -0700, Josh Don wrote:
> On Fri, Jun 16, 2023 at 6:33 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Thu, Jun 15, 2023 at 01:12:52PM -0700, Josh Don wrote:
> > > This fixes two issues:
> > > - throttled_clock should only be set on the group that is actually
> > > getting throttled
> > > - self-throttled time should only be accounted on entry/exit to
> > > throttled state when we have nested limits
> > >
> > > Fixes: 88cb2868250c ("sched: add throttled time stat for throttled children")
> > > Fixes: 3ab150d011da ("sched: don't account throttle time for empty groups")
> > > Signed-off-by: Josh Don <joshdon@google.com>
> >
> > Hurmph, those are not the sha1 I have in tip/sched/core.
> >
> > Also, should I rebase and just pull those patches so we can try again?
>
> Oh whoops, I used the SHA's from your queue.git.
>
> Yes, if it is still possible to intercept them then that would be
> great. I'm OOO at the moment, but can respin those for you first thing
> next week.
There's only a few patches on top, I'll make it go away.
@@ -4787,6 +4787,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
}
static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
+static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
static inline bool cfs_bandwidth_used(void);
@@ -4879,7 +4880,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
#ifdef CONFIG_CFS_BANDWIDTH
struct rq *rq = rq_of(cfs_rq);
- if (!cfs_rq->throttled_clock)
+ if (cfs_rq_throttled(cfs_rq) && !cfs_rq->throttled_clock)
cfs_rq->throttled_clock = rq_clock(rq);
if (!cfs_rq->throttled_clock_self)
cfs_rq->throttled_clock_self = rq_clock(rq);
@@ -5387,17 +5388,17 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
/* Add cfs_rq with load or one or more already running entities to the list */
if (!cfs_rq_is_decayed(cfs_rq))
list_add_leaf_cfs_rq(cfs_rq);
- }
- if (cfs_rq->throttled_clock_self) {
- u64 delta = rq_clock(rq) - cfs_rq->throttled_clock_self;
+ if (cfs_rq->throttled_clock_self) {
+ u64 delta = rq_clock(rq) - cfs_rq->throttled_clock_self;
- cfs_rq->throttled_clock_self = 0;
+ cfs_rq->throttled_clock_self = 0;
- if (SCHED_WARN_ON((s64)delta < 0))
- delta = 0;
+ if (SCHED_WARN_ON((s64)delta < 0))
+ delta = 0;
- cfs_rq->throttled_clock_self_time += delta;
+ cfs_rq->throttled_clock_self_time += delta;
+ }
}
return 0;
@@ -5412,13 +5413,13 @@ static int tg_throttle_down(struct task_group *tg, void *data)
if (!cfs_rq->throttle_count) {
cfs_rq->throttled_clock_pelt = rq_clock_pelt(rq);
list_del_leaf_cfs_rq(cfs_rq);
+
+ SCHED_WARN_ON(cfs_rq->throttled_clock_self);
+ if (cfs_rq->nr_running)
+ cfs_rq->throttled_clock_self = rq_clock(rq);
}
cfs_rq->throttle_count++;
- SCHED_WARN_ON(cfs_rq->throttled_clock_self);
- if (cfs_rq->nr_running)
- cfs_rq->throttled_clock_self = rq_clock(rq);
-
return 0;
}