[1/3] SCHEDULER: Add an interface for counting real utilization.

Message ID 20230928022228.15770-2-xiaobing.li@samsung.com
State New
Headers
Series Sq thread real utilization statistics. |

Commit Message

Xiaobing Li Sept. 28, 2023, 2:22 a.m. UTC
  Since pelt takes the running time of the thread as utilization, and for
some threads, although they are running, they are not actually
processing any transactions and are in an idling state.
our goal is to count the effective working time of the thread, so as to
Calculate the true utilization of threads.

Signed-off-by: Xiaobing Li <xiaobing.li@samsung.com>
---
 include/linux/kernel.h |  7 ++++++-
 include/linux/sched.h  |  1 +
 kernel/sched/cputime.c | 36 +++++++++++++++++++++++++++++++++++-
 kernel/sched/pelt.c    | 14 ++++++++++++++
 4 files changed, 56 insertions(+), 2 deletions(-)
  

Comments

kernel test robot Sept. 28, 2023, 7:52 a.m. UTC | #1
Hi Xiaobing,

kernel test robot noticed the following build warnings:

[auto build test WARNING on tip/sched/core]
[also build test WARNING on linus/master v6.6-rc3 next-20230928]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Xiaobing-Li/SCHEDULER-Add-an-interface-for-counting-real-utilization/20230928-103219
base:   tip/sched/core
patch link:    https://lore.kernel.org/r/20230928022228.15770-2-xiaobing.li%40samsung.com
patch subject: [PATCH 1/3] SCHEDULER: Add an interface for counting real utilization.
config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20230928/202309281554.CwGBQGhe-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20230928/202309281554.CwGBQGhe-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202309281554.CwGBQGhe-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from kernel/sched/build_policy.c:52:
>> kernel/sched/cputime.c:482:6: warning: no previous prototype for 'get_sqthread_util' [-Wmissing-prototypes]
     482 | void get_sqthread_util(struct task_struct *p)
         |      ^~~~~~~~~~~~~~~~~
   kernel/sched/cputime.c: In function 'get_sqthread_util':
   kernel/sched/cputime.c:484:56: error: 'struct kernel_cpustat' has no member named 'sq_util'
     484 |         struct task_struct **sqstat = kcpustat_this_cpu->sq_util;
         |                                                        ^~
   kernel/sched/cputime.c:486:29: error: 'MAX_SQ_NUM' undeclared (first use in this function)
     486 |         for (int i = 0; i < MAX_SQ_NUM; i++) {
         |                             ^~~~~~~~~~
   kernel/sched/cputime.c:486:29: note: each undeclared identifier is reported only once for each function it appears in
   kernel/sched/cputime.c:495:31: error: 'struct kernel_cpustat' has no member named 'flag'
     495 |         if (!kcpustat_this_cpu->flag) {
         |                               ^~
   kernel/sched/cputime.c:497:42: error: 'struct kernel_cpustat' has no member named 'sq_util'
     497 |                         kcpustat_this_cpu->sq_util[j] = NULL;
         |                                          ^~
   kernel/sched/cputime.c:498:34: error: 'struct kernel_cpustat' has no member named 'flag'
     498 |                 kcpustat_this_cpu->flag = true;
         |                                  ^~


vim +/get_sqthread_util +482 kernel/sched/cputime.c

   481	
 > 482	void get_sqthread_util(struct task_struct *p)
   483	{
   484		struct task_struct **sqstat = kcpustat_this_cpu->sq_util;
   485	
   486		for (int i = 0; i < MAX_SQ_NUM; i++) {
   487			if (sqstat[i] && (task_cpu(sqstat[i]) != task_cpu(p)
   488			|| sqstat[i]->__state == TASK_DEAD))
   489				sqstat[i] = NULL;
   490		}
   491	
   492		if (strncmp(p->comm, "iou-sqp", 7))
   493			return;
   494	
   495		if (!kcpustat_this_cpu->flag) {
   496			for (int j = 0; j < MAX_SQ_NUM; j++)
   497				kcpustat_this_cpu->sq_util[j] = NULL;
   498			kcpustat_this_cpu->flag = true;
   499		}
   500		int index = MAX_SQ_NUM;
   501		bool flag = true;
   502	
   503		for (int i = 0; i < MAX_SQ_NUM; i++) {
   504			if (sqstat[i] == p)
   505				flag = false;
   506			if (!sqstat[i] || task_cpu(sqstat[i]) != task_cpu(p)) {
   507				sqstat[i] = NULL;
   508				if (i < index)
   509					index = i;
   510			}
   511		}
   512		if (flag && index < MAX_SQ_NUM)
   513			sqstat[index] = p;
   514	}
   515
  
Peter Zijlstra Sept. 28, 2023, 8:02 a.m. UTC | #2
On Thu, Sep 28, 2023 at 10:22:26AM +0800, Xiaobing Li wrote:
> +	for (int i = 0; i < MAX_SQ_NUM; i++) {
> +		if (sqstat[i] && (task_cpu(sqstat[i]) != task_cpu(p)
> +		|| sqstat[i]->__state == TASK_DEAD))
> +			sqstat[i] = NULL;
> +	}

This coding style is horrific, please don't ever use that again.
  
Thomas Gleixner Sept. 29, 2023, 12:31 a.m. UTC | #3
On Thu, Sep 28 2023 at 10:22, Xiaobing Li wrote:
> Since pelt takes the running time of the thread as utilization, and for
> some threads, although they are running, they are not actually
> processing any transactions and are in an idling state.
> our goal is to count the effective working time of the thread, so as to
> Calculate the true utilization of threads.

Sorry. I can't figure out from the above what you are trying to achieve
and which problem this is actualy solving.

> +void get_sqthread_util(struct task_struct *p)
> +{
> +	struct task_struct **sqstat = kcpustat_this_cpu->sq_util;
> +
> +	for (int i = 0; i < MAX_SQ_NUM; i++) {
> +		if (sqstat[i] && (task_cpu(sqstat[i]) != task_cpu(p)
> +		|| sqstat[i]->__state == TASK_DEAD))
> +			sqstat[i] = NULL;
> +	}

This is unreadable.

> +
> +	if (strncmp(p->comm, "iou-sqp", 7))
> +		return;

You really want to do hard coded string parsing on every invocation of
this function, which happens at least once per tick, right?

What's so special about iou-sqp?

Nothing at all. It might be special for your particular workload but its
completely irrelevant to everyone else.

We are not adding random statistics to the hotpath just because.

Please come back once you have a proper justification for imposing this
On everyone which is not using iouring at all.

Thanks

        tglx
  

Patch

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index cee8fe87e9f4..c1557fa9cbbe 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -37,7 +37,8 @@ 
 #include <uapi/linux/kernel.h>
 
 #define STACK_MAGIC	0xdeadbeef
-
+struct cfs_rq;
+struct sched_entity;
 /**
  * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value
  * @x: value to repeat
@@ -103,6 +104,10 @@  extern int __cond_resched(void);
 
 #elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
 
+extern void __update_sq_avg_block(u64 now, struct sched_entity *se);
+
+extern void __update_sq_avg(u64 now, struct sched_entity *se);
+
 extern int __cond_resched(void);
 
 DECLARE_STATIC_CALL(might_resched, __cond_resched);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 77f01ac385f7..403ccb456c9a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -583,6 +583,7 @@  struct sched_entity {
 	 * collide with read-mostly values above.
 	 */
 	struct sched_avg		avg;
+	struct sched_avg		sq_avg;
 #endif
 };
 
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index af7952f12e6c..824203293fd9 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -479,6 +479,40 @@  void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
 
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE: */
 
+void get_sqthread_util(struct task_struct *p)
+{
+	struct task_struct **sqstat = kcpustat_this_cpu->sq_util;
+
+	for (int i = 0; i < MAX_SQ_NUM; i++) {
+		if (sqstat[i] && (task_cpu(sqstat[i]) != task_cpu(p)
+		|| sqstat[i]->__state == TASK_DEAD))
+			sqstat[i] = NULL;
+	}
+
+	if (strncmp(p->comm, "iou-sqp", 7))
+		return;
+
+	if (!kcpustat_this_cpu->flag) {
+		for (int j = 0; j < MAX_SQ_NUM; j++)
+			kcpustat_this_cpu->sq_util[j] = NULL;
+		kcpustat_this_cpu->flag = true;
+	}
+	int index = MAX_SQ_NUM;
+	bool flag = true;
+
+	for (int i = 0; i < MAX_SQ_NUM; i++) {
+		if (sqstat[i] == p)
+			flag = false;
+		if (!sqstat[i] || task_cpu(sqstat[i]) != task_cpu(p)) {
+			sqstat[i] = NULL;
+			if (i < index)
+				index = i;
+		}
+	}
+	if (flag && index < MAX_SQ_NUM)
+		sqstat[index] = p;
+}
+
 /*
  * Account a single tick of CPU time.
  * @p: the process that the CPU time gets accounted to
@@ -487,7 +521,7 @@  void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
 void account_process_tick(struct task_struct *p, int user_tick)
 {
 	u64 cputime, steal;
-
+	get_sqthread_util(p);
 	if (vtime_accounting_enabled_this_cpu())
 		return;
 
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index 0f310768260c..945efe80e08c 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -266,6 +266,20 @@  ___update_load_avg(struct sched_avg *sa, unsigned long load)
 	WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
 }
 
+void __update_sq_avg_block(u64 now, struct sched_entity *se)
+{
+	if (___update_load_sum(now, &se->sq_avg, 0, 0, 0))
+		___update_load_avg(&se->sq_avg, se_weight(se));
+}
+
+void __update_sq_avg(u64 now, struct sched_entity *se)
+{
+	struct cfs_rq *qcfs_rq = cfs_rq_of(se);
+
+	if (___update_load_sum(now, &se->sq_avg, !!se->on_rq, se_runnable(se), qcfs_rq->curr == se))
+		___update_load_avg(&se->sq_avg, se_weight(se));
+}
+
 /*
  * sched_entity:
  *