[1/2] sched/core: switch struct rq->nr_iowait to an unsigned int
Commit Message
In 3 of the 4 spots where we modify rq->nr_iowait we already hold the
rq lock, and in the 4th one we can just grab it. This avoids an atomic
in the scheduler fast path if we're in iowait, with the tradeoff being
that we'll grab the rq lock for the case where a task is scheduled out
in iowait mode on one CPU, and scheduled in on another CPU.
This obviously leaves the reading side as potentially racy, but that
should be OK. iowait states change all of the time, and can change while
it's being read as well, or summed up.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
kernel/sched/core.c | 15 ++++++++++-----
kernel/sched/cputime.c | 2 +-
kernel/sched/sched.h | 2 +-
3 files changed, 12 insertions(+), 7 deletions(-)
Comments
On 2024-02-27 21:06, Jens Axboe wrote:
> In 3 of the 4 spots where we modify rq->nr_iowait we already hold the
> rq lock, and in the 4th one we can just grab it. This avoids an atomic
> in the scheduler fast path if we're in iowait, with the tradeoff being
> that we'll grab the rq lock for the case where a task is scheduled out
> in iowait mode on one CPU, and scheduled in on another CPU.
>
> This obviously leaves the reading side as potentially racy, but that
> should be OK. iowait states change all of the time, and can change while
> it's being read as well, or summed up.
>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
> kernel/sched/core.c | 15 ++++++++++-----
> kernel/sched/cputime.c | 2 +-
> kernel/sched/sched.h | 2 +-
> 3 files changed, 12 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 9116bcc90346..ecc6c26096e5 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3789,7 +3789,7 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
> #endif
> if (p->in_iowait) {
> delayacct_blkio_end(p);
> - atomic_dec(&task_rq(p)->nr_iowait);
> + task_rq(p)->nr_iowait--;
> }
>
> activate_task(rq, p, en_flags);
> @@ -4354,8 +4354,13 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
> cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
> if (task_cpu(p) != cpu) {
> if (p->in_iowait) {
> + struct rq *rq = task_rq(p);
> + struct rq_flags rf;
> +
> + rq_lock(rq, &rf);
> + task_rq(p)->nr_iowait--;
Could this use rq directly, or does it not matter?
> + rq_unlock(rq, &rf);
> delayacct_blkio_end(p);
> - atomic_dec(&task_rq(p)->nr_iowait);
> }
>
> wake_flags |= WF_MIGRATED;
> @@ -5463,7 +5468,7 @@ unsigned long long nr_context_switches(void)
>
> unsigned int nr_iowait_cpu(int cpu)
> {
> - return atomic_read(&cpu_rq(cpu)->nr_iowait);
> + return cpu_rq(cpu)->nr_iowait;
> }
>
> /*
> @@ -6681,7 +6686,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
> deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
>
> if (prev->in_iowait) {
> - atomic_inc(&rq->nr_iowait);
> + rq->nr_iowait++;
> delayacct_blkio_start();
> }
> }
> @@ -10029,7 +10034,7 @@ void __init sched_init(void)
> #endif
> #endif /* CONFIG_SMP */
> hrtick_rq_init(rq);
> - atomic_set(&rq->nr_iowait, 0);
> + rq->nr_iowait = 0;
I checked that both ttwu_do_activate() and __schedule() have the rq lock
held, but I couldn't find it for this. Is it under the assumption that
the rq is in a pre-init state (maybe because scheduler_running = 0?) so
no lock is needed?
>
> #ifdef CONFIG_SCHED_CORE
> rq->core = rq;
> diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
> index af7952f12e6c..b970b6c6151e 100644
> --- a/kernel/sched/cputime.c
> +++ b/kernel/sched/cputime.c
> @@ -224,7 +224,7 @@ void account_idle_time(u64 cputime)
> u64 *cpustat = kcpustat_this_cpu->cpustat;
> struct rq *rq = this_rq();
>
> - if (atomic_read(&rq->nr_iowait) > 0)
> + if (rq->nr_iowait)
> cpustat[CPUTIME_IOWAIT] += cputime;
> else
> cpustat[CPUTIME_IDLE] += cputime;
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 001fe047bd5d..a1222a4bdc7b 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1049,7 +1049,7 @@ struct rq {
> u64 clock_idle_copy;
> #endif
>
> - atomic_t nr_iowait;
> + unsigned int nr_iowait;
>
> #ifdef CONFIG_SCHED_DEBUG
> u64 last_seen_need_resched_ns;
On 2/27/24 4:05 PM, David Wei wrote:
>> @@ -4354,8 +4354,13 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
>> cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
>> if (task_cpu(p) != cpu) {
>> if (p->in_iowait) {
>> + struct rq *rq = task_rq(p);
>> + struct rq_flags rf;
>> +
>> + rq_lock(rq, &rf);
>> + task_rq(p)->nr_iowait--;
>
> Could this use rq directly, or does it not matter?
It certainly could, I'll make that edit. Same thing, but may as well use
the variable as defined. Also makes it clear we're modifying the one
we've locked.
>> @@ -10029,7 +10034,7 @@ void __init sched_init(void)
>> #endif
>> #endif /* CONFIG_SMP */
>> hrtick_rq_init(rq);
>> - atomic_set(&rq->nr_iowait, 0);
>> + rq->nr_iowait = 0;
>
> I checked that both ttwu_do_activate() and __schedule() have the rq lock
> held, but I couldn't find it for this. Is it under the assumption that
> the rq is in a pre-init state (maybe because scheduler_running = 0?) so
> no lock is needed?
This is run at boot time (it's __init), so it's before anything is
running.
@@ -3789,7 +3789,7 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
#endif
if (p->in_iowait) {
delayacct_blkio_end(p);
- atomic_dec(&task_rq(p)->nr_iowait);
+ task_rq(p)->nr_iowait--;
}
activate_task(rq, p, en_flags);
@@ -4354,8 +4354,13 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
if (task_cpu(p) != cpu) {
if (p->in_iowait) {
+ struct rq *rq = task_rq(p);
+ struct rq_flags rf;
+
+ rq_lock(rq, &rf);
+ task_rq(p)->nr_iowait--;
+ rq_unlock(rq, &rf);
delayacct_blkio_end(p);
- atomic_dec(&task_rq(p)->nr_iowait);
}
wake_flags |= WF_MIGRATED;
@@ -5463,7 +5468,7 @@ unsigned long long nr_context_switches(void)
unsigned int nr_iowait_cpu(int cpu)
{
- return atomic_read(&cpu_rq(cpu)->nr_iowait);
+ return cpu_rq(cpu)->nr_iowait;
}
/*
@@ -6681,7 +6686,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
if (prev->in_iowait) {
- atomic_inc(&rq->nr_iowait);
+ rq->nr_iowait++;
delayacct_blkio_start();
}
}
@@ -10029,7 +10034,7 @@ void __init sched_init(void)
#endif
#endif /* CONFIG_SMP */
hrtick_rq_init(rq);
- atomic_set(&rq->nr_iowait, 0);
+ rq->nr_iowait = 0;
#ifdef CONFIG_SCHED_CORE
rq->core = rq;
@@ -224,7 +224,7 @@ void account_idle_time(u64 cputime)
u64 *cpustat = kcpustat_this_cpu->cpustat;
struct rq *rq = this_rq();
- if (atomic_read(&rq->nr_iowait) > 0)
+ if (rq->nr_iowait)
cpustat[CPUTIME_IOWAIT] += cputime;
else
cpustat[CPUTIME_IDLE] += cputime;
@@ -1049,7 +1049,7 @@ struct rq {
u64 clock_idle_copy;
#endif
- atomic_t nr_iowait;
+ unsigned int nr_iowait;
#ifdef CONFIG_SCHED_DEBUG
u64 last_seen_need_resched_ns;