[1/3] SCHEDULER: Add an interface for counting real utilization.
Commit Message
Since pelt takes the running time of the thread as utilization, and for
some threads, although they are running, they are not actually
processing any transactions and are in an idling state.
our goal is to count the effective working time of the thread, so as to
Calculate the true utilization of threads.
Signed-off-by: Xiaobing Li <xiaobing.li@samsung.com>
---
include/linux/kernel.h | 7 ++++++-
include/linux/sched.h | 1 +
kernel/sched/cputime.c | 36 +++++++++++++++++++++++++++++++++++-
kernel/sched/pelt.c | 14 ++++++++++++++
4 files changed, 56 insertions(+), 2 deletions(-)
Comments
Hi Xiaobing,
kernel test robot noticed the following build warnings:
[auto build test WARNING on tip/sched/core]
[also build test WARNING on linus/master v6.6-rc3 next-20230928]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Xiaobing-Li/SCHEDULER-Add-an-interface-for-counting-real-utilization/20230928-103219
base: tip/sched/core
patch link: https://lore.kernel.org/r/20230928022228.15770-2-xiaobing.li%40samsung.com
patch subject: [PATCH 1/3] SCHEDULER: Add an interface for counting real utilization.
config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20230928/202309281554.CwGBQGhe-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20230928/202309281554.CwGBQGhe-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202309281554.CwGBQGhe-lkp@intel.com/
All warnings (new ones prefixed by >>):
In file included from kernel/sched/build_policy.c:52:
>> kernel/sched/cputime.c:482:6: warning: no previous prototype for 'get_sqthread_util' [-Wmissing-prototypes]
482 | void get_sqthread_util(struct task_struct *p)
| ^~~~~~~~~~~~~~~~~
kernel/sched/cputime.c: In function 'get_sqthread_util':
kernel/sched/cputime.c:484:56: error: 'struct kernel_cpustat' has no member named 'sq_util'
484 | struct task_struct **sqstat = kcpustat_this_cpu->sq_util;
| ^~
kernel/sched/cputime.c:486:29: error: 'MAX_SQ_NUM' undeclared (first use in this function)
486 | for (int i = 0; i < MAX_SQ_NUM; i++) {
| ^~~~~~~~~~
kernel/sched/cputime.c:486:29: note: each undeclared identifier is reported only once for each function it appears in
kernel/sched/cputime.c:495:31: error: 'struct kernel_cpustat' has no member named 'flag'
495 | if (!kcpustat_this_cpu->flag) {
| ^~
kernel/sched/cputime.c:497:42: error: 'struct kernel_cpustat' has no member named 'sq_util'
497 | kcpustat_this_cpu->sq_util[j] = NULL;
| ^~
kernel/sched/cputime.c:498:34: error: 'struct kernel_cpustat' has no member named 'flag'
498 | kcpustat_this_cpu->flag = true;
| ^~
vim +/get_sqthread_util +482 kernel/sched/cputime.c
481
> 482 void get_sqthread_util(struct task_struct *p)
483 {
484 struct task_struct **sqstat = kcpustat_this_cpu->sq_util;
485
486 for (int i = 0; i < MAX_SQ_NUM; i++) {
487 if (sqstat[i] && (task_cpu(sqstat[i]) != task_cpu(p)
488 || sqstat[i]->__state == TASK_DEAD))
489 sqstat[i] = NULL;
490 }
491
492 if (strncmp(p->comm, "iou-sqp", 7))
493 return;
494
495 if (!kcpustat_this_cpu->flag) {
496 for (int j = 0; j < MAX_SQ_NUM; j++)
497 kcpustat_this_cpu->sq_util[j] = NULL;
498 kcpustat_this_cpu->flag = true;
499 }
500 int index = MAX_SQ_NUM;
501 bool flag = true;
502
503 for (int i = 0; i < MAX_SQ_NUM; i++) {
504 if (sqstat[i] == p)
505 flag = false;
506 if (!sqstat[i] || task_cpu(sqstat[i]) != task_cpu(p)) {
507 sqstat[i] = NULL;
508 if (i < index)
509 index = i;
510 }
511 }
512 if (flag && index < MAX_SQ_NUM)
513 sqstat[index] = p;
514 }
515
On Thu, Sep 28, 2023 at 10:22:26AM +0800, Xiaobing Li wrote:
> + for (int i = 0; i < MAX_SQ_NUM; i++) {
> + if (sqstat[i] && (task_cpu(sqstat[i]) != task_cpu(p)
> + || sqstat[i]->__state == TASK_DEAD))
> + sqstat[i] = NULL;
> + }
This coding style is horrific, please don't ever use that again.
On Thu, Sep 28 2023 at 10:22, Xiaobing Li wrote:
> Since pelt takes the running time of the thread as utilization, and for
> some threads, although they are running, they are not actually
> processing any transactions and are in an idling state.
> our goal is to count the effective working time of the thread, so as to
> Calculate the true utilization of threads.
Sorry. I can't figure out from the above what you are trying to achieve
and which problem this is actualy solving.
> +void get_sqthread_util(struct task_struct *p)
> +{
> + struct task_struct **sqstat = kcpustat_this_cpu->sq_util;
> +
> + for (int i = 0; i < MAX_SQ_NUM; i++) {
> + if (sqstat[i] && (task_cpu(sqstat[i]) != task_cpu(p)
> + || sqstat[i]->__state == TASK_DEAD))
> + sqstat[i] = NULL;
> + }
This is unreadable.
> +
> + if (strncmp(p->comm, "iou-sqp", 7))
> + return;
You really want to do hard coded string parsing on every invocation of
this function, which happens at least once per tick, right?
What's so special about iou-sqp?
Nothing at all. It might be special for your particular workload but its
completely irrelevant to everyone else.
We are not adding random statistics to the hotpath just because.
Please come back once you have a proper justification for imposing this
On everyone which is not using iouring at all.
Thanks
tglx
@@ -37,7 +37,8 @@
#include <uapi/linux/kernel.h>
#define STACK_MAGIC 0xdeadbeef
-
+struct cfs_rq;
+struct sched_entity;
/**
* REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value
* @x: value to repeat
@@ -103,6 +104,10 @@ extern int __cond_resched(void);
#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
+extern void __update_sq_avg_block(u64 now, struct sched_entity *se);
+
+extern void __update_sq_avg(u64 now, struct sched_entity *se);
+
extern int __cond_resched(void);
DECLARE_STATIC_CALL(might_resched, __cond_resched);
@@ -583,6 +583,7 @@ struct sched_entity {
* collide with read-mostly values above.
*/
struct sched_avg avg;
+ struct sched_avg sq_avg;
#endif
};
@@ -479,6 +479,40 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE: */
+void get_sqthread_util(struct task_struct *p)
+{
+ struct task_struct **sqstat = kcpustat_this_cpu->sq_util;
+
+ for (int i = 0; i < MAX_SQ_NUM; i++) {
+ if (sqstat[i] && (task_cpu(sqstat[i]) != task_cpu(p)
+ || sqstat[i]->__state == TASK_DEAD))
+ sqstat[i] = NULL;
+ }
+
+ if (strncmp(p->comm, "iou-sqp", 7))
+ return;
+
+ if (!kcpustat_this_cpu->flag) {
+ for (int j = 0; j < MAX_SQ_NUM; j++)
+ kcpustat_this_cpu->sq_util[j] = NULL;
+ kcpustat_this_cpu->flag = true;
+ }
+ int index = MAX_SQ_NUM;
+ bool flag = true;
+
+ for (int i = 0; i < MAX_SQ_NUM; i++) {
+ if (sqstat[i] == p)
+ flag = false;
+ if (!sqstat[i] || task_cpu(sqstat[i]) != task_cpu(p)) {
+ sqstat[i] = NULL;
+ if (i < index)
+ index = i;
+ }
+ }
+ if (flag && index < MAX_SQ_NUM)
+ sqstat[index] = p;
+}
+
/*
* Account a single tick of CPU time.
* @p: the process that the CPU time gets accounted to
@@ -487,7 +521,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
void account_process_tick(struct task_struct *p, int user_tick)
{
u64 cputime, steal;
-
+ get_sqthread_util(p);
if (vtime_accounting_enabled_this_cpu())
return;
@@ -266,6 +266,20 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load)
WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
}
+void __update_sq_avg_block(u64 now, struct sched_entity *se)
+{
+ if (___update_load_sum(now, &se->sq_avg, 0, 0, 0))
+ ___update_load_avg(&se->sq_avg, se_weight(se));
+}
+
+void __update_sq_avg(u64 now, struct sched_entity *se)
+{
+ struct cfs_rq *qcfs_rq = cfs_rq_of(se);
+
+ if (___update_load_sum(now, &se->sq_avg, !!se->on_rq, se_runnable(se), qcfs_rq->curr == se))
+ ___update_load_avg(&se->sq_avg, se_weight(se));
+}
+
/*
* sched_entity:
*