[2/4] powercap: idle_inject: Add begin/end callbacks

Message ID 20221108030342.1127216-3-srinivas.pandruvada@linux.intel.com
State New
Headers
Series Per CPU idle injection |

Commit Message

srinivas pandruvada Nov. 8, 2022, 3:03 a.m. UTC
  The actual CPU Idle percent can be different than what can be observed
from the hardware. Since the objective for CPU Idle injection is for
thermal control, the idle percent observed by the hardware is more
relevant.

To account for hardware feedback the actual runtime/idle time should be
adjusted.

Add a capability to register a begin and end callback during call to
idle_inject_register(). If they are not NULL, then begin callback is
called before calling play_idle_precise() and end callback is called
after play_idle_precise().

If begin callback is present and returns non 0 value then
play_idle_precise() is not called as it means there is some over
compensation.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 drivers/powercap/idle_inject.c    | 19 ++++++++++++++++++-
 drivers/thermal/cpuidle_cooling.c |  2 +-
 include/linux/idle_inject.h       |  4 +++-
 3 files changed, 22 insertions(+), 3 deletions(-)
  

Comments

Rafael J. Wysocki Nov. 9, 2022, 2:48 p.m. UTC | #1
On Tue, Nov 8, 2022 at 4:04 AM Srinivas Pandruvada
<srinivas.pandruvada@linux.intel.com> wrote:
>
> The actual CPU Idle percent can be different than what can be observed
> from the hardware.

Can you expand this a bit, please?  It is not clear what the "CPU idle
percent" and "observed from the hardware" phrases mean here.

> Since the objective for CPU Idle injection is for
> thermal control, the idle percent observed by the hardware is more
> relevant.
>
> To account for hardware feedback the actual runtime/idle time should be
> adjusted.
>
> Add a capability to register a begin and end callback during call to

I would call them "prepare" and "complete" without the "idle_inject_" prefix.

> idle_inject_register(). If they are not NULL, then begin callback is
> called before calling play_idle_precise() and end callback is called
> after play_idle_precise().
>
> If begin callback is present and returns non 0 value then
> play_idle_precise() is not called as it means there is some over
> compensation.

This behavior needs to be documented somewhere other than the patch changelog.

> Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> ---
>  drivers/powercap/idle_inject.c    | 19 ++++++++++++++++++-
>  drivers/thermal/cpuidle_cooling.c |  2 +-
>  include/linux/idle_inject.h       |  4 +++-
>  3 files changed, 22 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
> index e73885bd9065..14968b0ff133 100644
> --- a/drivers/powercap/idle_inject.c
> +++ b/drivers/powercap/idle_inject.c
> @@ -70,6 +70,8 @@ struct idle_inject_device {
>         unsigned int idle_duration_us;
>         unsigned int run_duration_us;
>         unsigned int latency_us;
> +       int (*idle_inject_begin)(unsigned int cpu);
> +       void (*idle_inject_end)(unsigned int cpu);

The comment above needs to be updated.  Also please see the remark
above regarding callback names.

>         unsigned long cpumask[];
>  };
>
> @@ -132,6 +134,7 @@ static void idle_inject_fn(unsigned int cpu)
>  {
>         struct idle_inject_device *ii_dev;
>         struct idle_inject_thread *iit;
> +       int ret;
>
>         ii_dev = per_cpu(idle_inject_device, cpu);
>         iit = per_cpu_ptr(&idle_inject_thread, cpu);
> @@ -141,8 +144,18 @@ static void idle_inject_fn(unsigned int cpu)
>          */
>         iit->should_run = 0;
>
> +       if (ii_dev->idle_inject_begin) {
> +               ret = ii_dev->idle_inject_begin(cpu);
> +               if (ret)
> +                       goto skip;
> +       }
> +
>         play_idle_precise(READ_ONCE(ii_dev->idle_duration_us) * NSEC_PER_USEC,
>                           READ_ONCE(ii_dev->latency_us) * NSEC_PER_USEC);
> +
> +skip:
> +       if (ii_dev->idle_inject_end)
> +               ii_dev->idle_inject_end(cpu);
>  }
>
>  /**
> @@ -302,7 +315,9 @@ static int idle_inject_should_run(unsigned int cpu)
>   * Return: NULL if memory allocation fails, idle injection control device
>   * pointer on success.
>   */
> -struct idle_inject_device *idle_inject_register(struct cpumask *cpumask)
> +struct idle_inject_device *idle_inject_register(struct cpumask *cpumask,
> +                                               int (*idle_inject_begin)(unsigned int cpu),
> +                                               void (*idle_inject_end)(unsigned int cpu))

Instead of modifying this, I would add something like
idle_inject_register_full() that will take the callback arguments and
will be called internally by idle_inject_register().

>  {
>         struct idle_inject_device *ii_dev;
>         int cpu, cpu_rb;
> @@ -315,6 +330,8 @@ struct idle_inject_device *idle_inject_register(struct cpumask *cpumask)
>         hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>         ii_dev->timer.function = idle_inject_timer_fn;
>         ii_dev->latency_us = UINT_MAX;
> +       ii_dev->idle_inject_begin = idle_inject_begin;
> +       ii_dev->idle_inject_end = idle_inject_end;
>
>         for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) {
>
> diff --git a/drivers/thermal/cpuidle_cooling.c b/drivers/thermal/cpuidle_cooling.c
> index 4f41102e8b16..e8b35b3b5767 100644
> --- a/drivers/thermal/cpuidle_cooling.c
> +++ b/drivers/thermal/cpuidle_cooling.c
> @@ -184,7 +184,7 @@ static int __cpuidle_cooling_register(struct device_node *np,
>                 goto out;
>         }
>
> -       ii_dev = idle_inject_register(drv->cpumask);
> +       ii_dev = idle_inject_register(drv->cpumask, NULL, NULL);

So this change would not be necessary any more.

>         if (!ii_dev) {
>                 ret = -EINVAL;
>                 goto out_kfree;
> diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h
> index fb88e23a99d3..73f3414fafe2 100644
> --- a/include/linux/idle_inject.h
> +++ b/include/linux/idle_inject.h
> @@ -11,7 +11,9 @@
>  /* private idle injection device structure */
>  struct idle_inject_device;
>
> -struct idle_inject_device *idle_inject_register(struct cpumask *cpumask);
> +struct idle_inject_device *idle_inject_register(struct cpumask *cpumask,
> +                                               int (*idle_inject_begin)(unsigned int cpu),
> +                                               void (*idle_inject_end)(unsigned int cpu));
>
>  void idle_inject_unregister(struct idle_inject_device *ii_dev);
>
> --
  
srinivas pandruvada Nov. 10, 2022, 11:13 p.m. UTC | #2
On Wed, 2022-11-09 at 15:48 +0100, Rafael J. Wysocki wrote:
> On Tue, Nov 8, 2022 at 4:04 AM Srinivas Pandruvada
> <srinivas.pandruvada@linux.intel.com> wrote:
> > 
> > The actual CPU Idle percent can be different than what can be
> > observed
> > from the hardware.
> 
> Can you expand this a bit, please?  It is not clear what the "CPU
> idle
> percent" and "observed from the hardware" phrases mean here.
Sure.

> 
> > Since the objective for CPU Idle injection is for
> > thermal control, the idle percent observed by the hardware is more
> > relevant.
> > 
> > To account for hardware feedback the actual runtime/idle time
> > should be
> > adjusted.
> > 
> > Add a capability to register a begin and end callback during call
> > to
> 
> I would call them "prepare" and "complete" without the "idle_inject_"
> prefix.
OK

> 
> > idle_inject_register(). If they are not NULL, then begin callback
> > is
> > called before calling play_idle_precise() and end callback is
> > called
> > after play_idle_precise().
> > 
> > If begin callback is present and returns non 0 value then
> > play_idle_precise() is not called as it means there is some over
> > compensation.
> 
> This behavior needs to be documented somewhere other than the patch
> changelog.
We can add a kernel doc and add there.

> 
> > Signed-off-by: Srinivas Pandruvada <   
> > srinivas.pandruvada@linux.intel.com>
> > ---
> >  drivers/powercap/idle_inject.c    | 19 ++++++++++++++++++-
> >  drivers/thermal/cpuidle_cooling.c |  2 +-
> >  include/linux/idle_inject.h       |  4 +++-
> >  3 files changed, 22 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/powercap/idle_inject.c
> > b/drivers/powercap/idle_inject.c
> > index e73885bd9065..14968b0ff133 100644
> > --- a/drivers/powercap/idle_inject.c
> > +++ b/drivers/powercap/idle_inject.c
> > @@ -70,6 +70,8 @@ struct idle_inject_device {
> >         unsigned int idle_duration_us;
> >         unsigned int run_duration_us;
> >         unsigned int latency_us;
> > +       int (*idle_inject_begin)(unsigned int cpu);
> > +       void (*idle_inject_end)(unsigned int cpu);
> 
> The comment above needs to be updated.  Also please see the remark
> above regarding callback names.
> 
> >         unsigned long cpumask[];
> >  };
> > 
> > @@ -132,6 +134,7 @@ static void idle_inject_fn(unsigned int cpu)
> >  {
> >         struct idle_inject_device *ii_dev;
> >         struct idle_inject_thread *iit;
> > +       int ret;
> > 
> >         ii_dev = per_cpu(idle_inject_device, cpu);
> >         iit = per_cpu_ptr(&idle_inject_thread, cpu);
> > @@ -141,8 +144,18 @@ static void idle_inject_fn(unsigned int cpu)
> >          */
> >         iit->should_run = 0;
> > 
> > +       if (ii_dev->idle_inject_begin) {
> > +               ret = ii_dev->idle_inject_begin(cpu);
> > +               if (ret)
> > +                       goto skip;
> > +       }
> > +
> >         play_idle_precise(READ_ONCE(ii_dev->idle_duration_us) *
> > NSEC_PER_USEC,
> >                           READ_ONCE(ii_dev->latency_us) *
> > NSEC_PER_USEC);
> > +
> > +skip:
> > +       if (ii_dev->idle_inject_end)
> > +               ii_dev->idle_inject_end(cpu);
> >  }
> > 
> >  /**
> > @@ -302,7 +315,9 @@ static int idle_inject_should_run(unsigned int
> > cpu)
> >   * Return: NULL if memory allocation fails, idle injection control
> > device
> >   * pointer on success.
> >   */
> > -struct idle_inject_device *idle_inject_register(struct cpumask
> > *cpumask)
> > +struct idle_inject_device *idle_inject_register(struct cpumask
> > *cpumask,
> > +                                               int
> > (*idle_inject_begin)(unsigned int cpu),
> > +                                               void
> > (*idle_inject_end)(unsigned int cpu))
> 
> Instead of modifying this, I would add something like
> idle_inject_register_full() that will take the callback arguments and
> will be called internally by idle_inject_register().
OK

Thanks,
Srinivas

> 
> >  {
> >         struct idle_inject_device *ii_dev;
> >         int cpu, cpu_rb;
> > @@ -315,6 +330,8 @@ struct idle_inject_device
> > *idle_inject_register(struct cpumask *cpumask)
> >         hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC,
> > HRTIMER_MODE_REL);
> >         ii_dev->timer.function = idle_inject_timer_fn;
> >         ii_dev->latency_us = UINT_MAX;
> > +       ii_dev->idle_inject_begin = idle_inject_begin;
> > +       ii_dev->idle_inject_end = idle_inject_end;
> > 
> >         for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) {
> > 
> > diff --git a/drivers/thermal/cpuidle_cooling.c
> > b/drivers/thermal/cpuidle_cooling.c
> > index 4f41102e8b16..e8b35b3b5767 100644
> > --- a/drivers/thermal/cpuidle_cooling.c
> > +++ b/drivers/thermal/cpuidle_cooling.c
> > @@ -184,7 +184,7 @@ static int __cpuidle_cooling_register(struct
> > device_node *np,
> >                 goto out;
> >         }
> > 
> > -       ii_dev = idle_inject_register(drv->cpumask);
> > +       ii_dev = idle_inject_register(drv->cpumask, NULL, NULL);
> 
> So this change would not be necessary any more.
> 
> >         if (!ii_dev) {
> >                 ret = -EINVAL;
> >                 goto out_kfree;
> > diff --git a/include/linux/idle_inject.h
> > b/include/linux/idle_inject.h
> > index fb88e23a99d3..73f3414fafe2 100644
> > --- a/include/linux/idle_inject.h
> > +++ b/include/linux/idle_inject.h
> > @@ -11,7 +11,9 @@
> >  /* private idle injection device structure */
> >  struct idle_inject_device;
> > 
> > -struct idle_inject_device *idle_inject_register(struct cpumask
> > *cpumask);
> > +struct idle_inject_device *idle_inject_register(struct cpumask
> > *cpumask,
> > +                                               int
> > (*idle_inject_begin)(unsigned int cpu),
> > +                                               void
> > (*idle_inject_end)(unsigned int cpu));
> > 
> >  void idle_inject_unregister(struct idle_inject_device *ii_dev);
> > 
> > --
  

Patch

diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
index e73885bd9065..14968b0ff133 100644
--- a/drivers/powercap/idle_inject.c
+++ b/drivers/powercap/idle_inject.c
@@ -70,6 +70,8 @@  struct idle_inject_device {
 	unsigned int idle_duration_us;
 	unsigned int run_duration_us;
 	unsigned int latency_us;
+	int (*idle_inject_begin)(unsigned int cpu);
+	void (*idle_inject_end)(unsigned int cpu);
 	unsigned long cpumask[];
 };
 
@@ -132,6 +134,7 @@  static void idle_inject_fn(unsigned int cpu)
 {
 	struct idle_inject_device *ii_dev;
 	struct idle_inject_thread *iit;
+	int ret;
 
 	ii_dev = per_cpu(idle_inject_device, cpu);
 	iit = per_cpu_ptr(&idle_inject_thread, cpu);
@@ -141,8 +144,18 @@  static void idle_inject_fn(unsigned int cpu)
 	 */
 	iit->should_run = 0;
 
+	if (ii_dev->idle_inject_begin) {
+		ret = ii_dev->idle_inject_begin(cpu);
+		if (ret)
+			goto skip;
+	}
+
 	play_idle_precise(READ_ONCE(ii_dev->idle_duration_us) * NSEC_PER_USEC,
 			  READ_ONCE(ii_dev->latency_us) * NSEC_PER_USEC);
+
+skip:
+	if (ii_dev->idle_inject_end)
+		ii_dev->idle_inject_end(cpu);
 }
 
 /**
@@ -302,7 +315,9 @@  static int idle_inject_should_run(unsigned int cpu)
  * Return: NULL if memory allocation fails, idle injection control device
  * pointer on success.
  */
-struct idle_inject_device *idle_inject_register(struct cpumask *cpumask)
+struct idle_inject_device *idle_inject_register(struct cpumask *cpumask,
+						int (*idle_inject_begin)(unsigned int cpu),
+						void (*idle_inject_end)(unsigned int cpu))
 {
 	struct idle_inject_device *ii_dev;
 	int cpu, cpu_rb;
@@ -315,6 +330,8 @@  struct idle_inject_device *idle_inject_register(struct cpumask *cpumask)
 	hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	ii_dev->timer.function = idle_inject_timer_fn;
 	ii_dev->latency_us = UINT_MAX;
+	ii_dev->idle_inject_begin = idle_inject_begin;
+	ii_dev->idle_inject_end = idle_inject_end;
 
 	for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) {
 
diff --git a/drivers/thermal/cpuidle_cooling.c b/drivers/thermal/cpuidle_cooling.c
index 4f41102e8b16..e8b35b3b5767 100644
--- a/drivers/thermal/cpuidle_cooling.c
+++ b/drivers/thermal/cpuidle_cooling.c
@@ -184,7 +184,7 @@  static int __cpuidle_cooling_register(struct device_node *np,
 		goto out;
 	}
 
-	ii_dev = idle_inject_register(drv->cpumask);
+	ii_dev = idle_inject_register(drv->cpumask, NULL, NULL);
 	if (!ii_dev) {
 		ret = -EINVAL;
 		goto out_kfree;
diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h
index fb88e23a99d3..73f3414fafe2 100644
--- a/include/linux/idle_inject.h
+++ b/include/linux/idle_inject.h
@@ -11,7 +11,9 @@ 
 /* private idle injection device structure */
 struct idle_inject_device;
 
-struct idle_inject_device *idle_inject_register(struct cpumask *cpumask);
+struct idle_inject_device *idle_inject_register(struct cpumask *cpumask,
+						int (*idle_inject_begin)(unsigned int cpu),
+						void (*idle_inject_end)(unsigned int cpu));
 
 void idle_inject_unregister(struct idle_inject_device *ii_dev);