[1/2] sched: don't account throttle time for empty groups

Message ID 20230518013414.3053254-1-joshdon@google.com
State New
Headers
Series [1/2] sched: don't account throttle time for empty groups |

Commit Message

Josh Don May 18, 2023, 1:34 a.m. UTC
  It is easy for a cfs_rq to become throttled even when it has no enqueued
entities (for example, if we have just put_prev()'d the last runnable
task of the cfs_rq, and the cfs_rq is out of quota).

Avoid accounting this time towards total throttle time, since it
otherwise falsely inflates the stats.

Signed-off-by: Josh Don <joshdon@google.com>
---
 kernel/sched/fair.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)
  

Comments

kernel test robot May 18, 2023, 8:20 a.m. UTC | #1
Hi Josh,

kernel test robot noticed the following build errors:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on tip/master tip/auto-latest linus/master v6.4-rc2 next-20230518]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Josh-Don/sched-add-throttled-time-stat-for-throttled-children/20230518-095541
base:   tip/sched/core
patch link:    https://lore.kernel.org/r/20230518013414.3053254-1-joshdon%40google.com
patch subject: [PATCH 1/2] sched: don't account throttle time for empty groups
config: arm-randconfig-r025-20230517
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project b0fb98227c90adf2536c9ad644a74d5e92961111)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install arm cross compiling tool for clang build
        # apt-get install binutils-arm-linux-gnueabi
        # https://github.com/intel-lab-lkp/linux/commit/9414859be598a05f1bb078f8bf83e132976384ea
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Josh-Don/sched-add-throttled-time-stat-for-throttled-children/20230518-095541
        git checkout 9414859be598a05f1bb078f8bf83e132976384ea
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash kernel/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202305181547.BxmBDKPa-lkp@intel.com/

All errors (new ones prefixed by >>):

>> kernel/sched/fair.c:4880:17: error: no member named 'throttled_clock' in 'struct cfs_rq'
                           if (!cfs_rq->throttled_clock)
                                ~~~~~~  ^
   kernel/sched/fair.c:4881:13: error: no member named 'throttled_clock' in 'struct cfs_rq'
                                   cfs_rq->throttled_clock = rq_clock(rq);
                                   ~~~~~~  ^
   kernel/sched/fair.c:6181:6: warning: no previous prototype for function 'init_cfs_bandwidth' [-Wmissing-prototypes]
   void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
        ^
   kernel/sched/fair.c:6181:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
   ^
   static 
   kernel/sched/fair.c:12617:6: warning: no previous prototype for function 'free_fair_sched_group' [-Wmissing-prototypes]
   void free_fair_sched_group(struct task_group *tg) { }
        ^
   kernel/sched/fair.c:12617:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   void free_fair_sched_group(struct task_group *tg) { }
   ^
   static 
   kernel/sched/fair.c:12619:5: warning: no previous prototype for function 'alloc_fair_sched_group' [-Wmissing-prototypes]
   int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
       ^
   kernel/sched/fair.c:12619:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
   ^
   static 
   kernel/sched/fair.c:12624:6: warning: no previous prototype for function 'online_fair_sched_group' [-Wmissing-prototypes]
   void online_fair_sched_group(struct task_group *tg) { }
        ^
   kernel/sched/fair.c:12624:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   void online_fair_sched_group(struct task_group *tg) { }
   ^
   static 
   kernel/sched/fair.c:12626:6: warning: no previous prototype for function 'unregister_fair_sched_group' [-Wmissing-prototypes]
   void unregister_fair_sched_group(struct task_group *tg) { }
        ^
   kernel/sched/fair.c:12626:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   void unregister_fair_sched_group(struct task_group *tg) { }
   ^
   static 
   5 warnings and 2 errors generated.


vim +4880 kernel/sched/fair.c

  4792	
  4793	/*
  4794	 * MIGRATION
  4795	 *
  4796	 *	dequeue
  4797	 *	  update_curr()
  4798	 *	    update_min_vruntime()
  4799	 *	  vruntime -= min_vruntime
  4800	 *
  4801	 *	enqueue
  4802	 *	  update_curr()
  4803	 *	    update_min_vruntime()
  4804	 *	  vruntime += min_vruntime
  4805	 *
  4806	 * this way the vruntime transition between RQs is done when both
  4807	 * min_vruntime are up-to-date.
  4808	 *
  4809	 * WAKEUP (remote)
  4810	 *
  4811	 *	->migrate_task_rq_fair() (p->state == TASK_WAKING)
  4812	 *	  vruntime -= min_vruntime
  4813	 *
  4814	 *	enqueue
  4815	 *	  update_curr()
  4816	 *	    update_min_vruntime()
  4817	 *	  vruntime += min_vruntime
  4818	 *
  4819	 * this way we don't have the most up-to-date min_vruntime on the originating
  4820	 * CPU and an up-to-date min_vruntime on the destination CPU.
  4821	 */
  4822	
  4823	static void
  4824	enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  4825	{
  4826		bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
  4827		bool curr = cfs_rq->curr == se;
  4828		struct rq *rq = rq_of(cfs_rq);
  4829	
  4830		/*
  4831		 * If we're the current task, we must renormalise before calling
  4832		 * update_curr().
  4833		 */
  4834		if (renorm && curr)
  4835			se->vruntime += cfs_rq->min_vruntime;
  4836	
  4837		update_curr(cfs_rq);
  4838	
  4839		/*
  4840		 * Otherwise, renormalise after, such that we're placed at the current
  4841		 * moment in time, instead of some random moment in the past. Being
  4842		 * placed in the past could significantly boost this task to the
  4843		 * fairness detriment of existing tasks.
  4844		 */
  4845		if (renorm && !curr)
  4846			se->vruntime += cfs_rq->min_vruntime;
  4847	
  4848		/*
  4849		 * When enqueuing a sched_entity, we must:
  4850		 *   - Update loads to have both entity and cfs_rq synced with now.
  4851		 *   - For group_entity, update its runnable_weight to reflect the new
  4852		 *     h_nr_running of its group cfs_rq.
  4853		 *   - For group_entity, update its weight to reflect the new share of
  4854		 *     its group cfs_rq
  4855		 *   - Add its new weight to cfs_rq->load.weight
  4856		 */
  4857		update_load_avg(cfs_rq, se, UPDATE_TG | DO_ATTACH);
  4858		se_update_runnable(se);
  4859		update_cfs_group(se);
  4860		account_entity_enqueue(cfs_rq, se);
  4861	
  4862		if (flags & ENQUEUE_WAKEUP)
  4863			place_entity(cfs_rq, se, 0);
  4864		/* Entity has migrated, no longer consider this task hot */
  4865		if (flags & ENQUEUE_MIGRATED)
  4866			se->exec_start = 0;
  4867	
  4868		check_schedstat_required();
  4869		update_stats_enqueue_fair(cfs_rq, se, flags);
  4870		check_spread(cfs_rq, se);
  4871		if (!curr)
  4872			__enqueue_entity(cfs_rq, se);
  4873		se->on_rq = 1;
  4874	
  4875		if (cfs_rq->nr_running == 1) {
  4876			check_enqueue_throttle(cfs_rq);
  4877			if (!throttled_hierarchy(cfs_rq)) {
  4878				list_add_leaf_cfs_rq(cfs_rq);
  4879			} else {
> 4880				if (!cfs_rq->throttled_clock)
  4881					cfs_rq->throttled_clock = rq_clock(rq);
  4882			}
  4883		}
  4884	}
  4885
  

Patch

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f4b8b906d30a..85c2c0c3cab6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4768,6 +4768,7 @@  enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
 	bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
 	bool curr = cfs_rq->curr == se;
+	struct rq *rq = rq_of(cfs_rq);
 
 	/*
 	 * If we're the current task, we must renormalise before calling
@@ -4816,8 +4817,12 @@  enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	if (cfs_rq->nr_running == 1) {
 		check_enqueue_throttle(cfs_rq);
-		if (!throttled_hierarchy(cfs_rq))
+		if (!throttled_hierarchy(cfs_rq)) {
 			list_add_leaf_cfs_rq(cfs_rq);
+		} else {
+			if (!cfs_rq->throttled_clock)
+				cfs_rq->throttled_clock = rq_clock(rq);
+		}
 	}
 }
 
@@ -5423,7 +5428,9 @@  static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
 	 * throttled-list.  rq->lock protects completion.
 	 */
 	cfs_rq->throttled = 1;
-	cfs_rq->throttled_clock = rq_clock(rq);
+	SCHED_WARN_ON(cfs_rq->throttled_clock);
+	if (cfs_rq->nr_running)
+		cfs_rq->throttled_clock = rq_clock(rq);
 	return true;
 }
 
@@ -5441,7 +5448,10 @@  void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 	update_rq_clock(rq);
 
 	raw_spin_lock(&cfs_b->lock);
-	cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock;
+	if (cfs_rq->throttled_clock) {
+		cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock;
+		cfs_rq->throttled_clock = 0;
+	}
 	list_del_rcu(&cfs_rq->throttled_list);
 	raw_spin_unlock(&cfs_b->lock);