[v7,1/4] PM: Add sysfs files to represent time spent in hardware sleep state

Message ID 20230411211719.4549-2-mario.limonciello@amd.com
State New
Headers
Series Add vendor agnostic mechanism to report hardware sleep |

Commit Message

Mario Limonciello April 11, 2023, 9:17 p.m. UTC
  Userspace can't easily discover how much of a sleep cycle was spent in a
hardware sleep state without using kernel tracing and vendor specific sysfs
or debugfs files.

To make this information more discoverable, introduce two new sysfs files:
1) The time spent in a hw sleep state for last cycle.
2) The time spent in a hw sleep state since the kernel booted
Both of these files will be present only if the system supports s2idle.

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
v6->v7:
 * Rename to max_hw_sleep (David E Box)
 * Drop overflow checks (David E Box)
v5->v6:
 * Add total attribute as well
 * Change text for documentation
 * Adjust flow of is_visible callback.
 * If overflow was detected in total attribute return -EOVERFLOW
 * Rename symbol
 * Add stub for symbol for builds without CONFIG_PM_SLEEP
v4->v5:
 * Provide time in microseconds instead of percent. Userspace can convert
   this if desirable.
---
 Documentation/ABI/testing/sysfs-power | 24 ++++++++++++++++
 include/linux/suspend.h               |  5 ++++
 kernel/power/main.c                   | 40 +++++++++++++++++++++++++++
 3 files changed, 69 insertions(+)
  

Comments

Hans de Goede April 11, 2023, 9:42 p.m. UTC | #1
Hi,

On 4/11/23 23:17, Mario Limonciello wrote:
> Userspace can't easily discover how much of a sleep cycle was spent in a
> hardware sleep state without using kernel tracing and vendor specific sysfs
> or debugfs files.
> 
> To make this information more discoverable, introduce two new sysfs files:
> 1) The time spent in a hw sleep state for last cycle.
> 2) The time spent in a hw sleep state since the kernel booted
> Both of these files will be present only if the system supports s2idle.
> 
> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
> ---
> v6->v7:
>  * Rename to max_hw_sleep (David E Box)
>  * Drop overflow checks (David E Box)
> v5->v6:
>  * Add total attribute as well
>  * Change text for documentation
>  * Adjust flow of is_visible callback.
>  * If overflow was detected in total attribute return -EOVERFLOW
>  * Rename symbol
>  * Add stub for symbol for builds without CONFIG_PM_SLEEP
> v4->v5:
>  * Provide time in microseconds instead of percent. Userspace can convert
>    this if desirable.
> ---
>  Documentation/ABI/testing/sysfs-power | 24 ++++++++++++++++
>  include/linux/suspend.h               |  5 ++++
>  kernel/power/main.c                   | 40 +++++++++++++++++++++++++++
>  3 files changed, 69 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
> index f99d433ff311..0723b4dadfbe 100644
> --- a/Documentation/ABI/testing/sysfs-power
> +++ b/Documentation/ABI/testing/sysfs-power
> @@ -413,6 +413,30 @@ Description:
>  		The /sys/power/suspend_stats/last_failed_step file contains
>  		the last failed step in the suspend/resume path.
>  
> +What:		/sys/power/suspend_stats/last_hw_sleep
> +Date:		June 2023
> +Contact:	Mario Limonciello <mario.limonciello@amd.com>
> +Description:
> +		The /sys/power/suspend_stats/last_hw_sleep file
> +		contains the duration of time spent in a hardware sleep
> +		state in the most recent system suspend-resume cycle.
> +		This number is measured in microseconds.
> +
> +		NOTE: Limitations in the size of the hardware counters may
> +		cause this value to be inaccurate in longer sleep cycles.

Hmm I thought that the plan was to add a separate sysfs attr with
the max time that the hw could represent here, so that userspace
actually know what constitutes a "longer sleep cycle" ?

That would seem better then such a handwavy comment in the ABI docs?

> +
> +What:		/sys/power/suspend_stats/max_hw_sleep
> +Date:		June 2023
> +Contact:	Mario Limonciello <mario.limonciello@amd.com>
> +Description:
> +		The /sys/power/suspend_stats/max_hw_sleep file
> +		contains the aggregate of time spent in a hardware sleep
> +		state since the kernel was booted. This number
> +		is measured in microseconds.
> +
> +		NOTE: Limitations in the size of the hardware counters may
> +		cause this value to be inaccurate in longer sleep cycles.

Maybe "total_hw_sleep" instead of "max_hw_sleep" ? Also since max to
me sounds like the limit of the longest sleep the hw counters can
register, so that is bit confusing with the discussion about those
limits.

Regards,

Hans



> +
>  What:		/sys/power/sync_on_suspend
>  Date:		October 2019
>  Contact:	Jonas Meurer <jonas@freesources.org>
> diff --git a/include/linux/suspend.h b/include/linux/suspend.h
> index cfe19a028918..819e9677fd10 100644
> --- a/include/linux/suspend.h
> +++ b/include/linux/suspend.h
> @@ -68,6 +68,8 @@ struct suspend_stats {
>  	int	last_failed_errno;
>  	int	errno[REC_FAILED_NUM];
>  	int	last_failed_step;
> +	u64	last_hw_sleep;
> +	u64	max_hw_sleep;
>  	enum suspend_stat_step	failed_steps[REC_FAILED_NUM];
>  };
>  
> @@ -489,6 +491,7 @@ void restore_processor_state(void);
>  extern int register_pm_notifier(struct notifier_block *nb);
>  extern int unregister_pm_notifier(struct notifier_block *nb);
>  extern void ksys_sync_helper(void);
> +extern void pm_report_hw_sleep_time(u64 t);
>  
>  #define pm_notifier(fn, pri) {				\
>  	static struct notifier_block fn##_nb =			\
> @@ -526,6 +529,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
>  	return 0;
>  }
>  
> +static inline void pm_report_hw_sleep_time(u64 t) {};
> +
>  static inline void ksys_sync_helper(void) {}
>  
>  #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
> diff --git a/kernel/power/main.c b/kernel/power/main.c
> index 31ec4a9b9d70..a5546b91ecc9 100644
> --- a/kernel/power/main.c
> +++ b/kernel/power/main.c
> @@ -6,6 +6,7 @@
>   * Copyright (c) 2003 Open Source Development Lab
>   */
>  
> +#include <linux/acpi.h>
>  #include <linux/export.h>
>  #include <linux/kobject.h>
>  #include <linux/string.h>
> @@ -83,6 +84,13 @@ int unregister_pm_notifier(struct notifier_block *nb)
>  }
>  EXPORT_SYMBOL_GPL(unregister_pm_notifier);
>  
> +void pm_report_hw_sleep_time(u64 t)
> +{
> +	suspend_stats.last_hw_sleep = t;
> +	suspend_stats.max_hw_sleep += t;
> +}
> +EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time);
> +
>  int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down)
>  {
>  	int ret;
> @@ -377,6 +385,22 @@ static ssize_t last_failed_step_show(struct kobject *kobj,
>  }
>  static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step);
>  
> +static ssize_t last_hw_sleep_show(struct kobject *kobj,
> +		struct kobj_attribute *attr, char *buf)
> +{
> +	return sysfs_emit(buf, "%llu\n", suspend_stats.last_hw_sleep);
> +}
> +static struct kobj_attribute last_hw_sleep = __ATTR_RO(last_hw_sleep);
> +
> +static ssize_t max_hw_sleep_show(struct kobject *kobj,
> +		struct kobj_attribute *attr, char *buf)
> +{
> +	if (suspend_stats.max_hw_sleep == -EOVERFLOW)
> +		return suspend_stats.max_hw_sleep;
> +	return sysfs_emit(buf, "%llu\n", suspend_stats.max_hw_sleep);
> +}
> +static struct kobj_attribute max_hw_sleep = __ATTR_RO(max_hw_sleep);
> +
>  static struct attribute *suspend_attrs[] = {
>  	&success.attr,
>  	&fail.attr,
> @@ -391,12 +415,28 @@ static struct attribute *suspend_attrs[] = {
>  	&last_failed_dev.attr,
>  	&last_failed_errno.attr,
>  	&last_failed_step.attr,
> +	&last_hw_sleep.attr,
> +	&max_hw_sleep.attr,
>  	NULL,
>  };
>  
> +static umode_t suspend_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
> +{
> +	if (attr != &last_hw_sleep.attr &&
> +	    attr != &max_hw_sleep.attr)
> +		return 0444;
> +
> +#ifdef CONFIG_ACPI
> +	if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)
> +		return 0444;
> +#endif
> +	return 0;
> +}
> +
>  static const struct attribute_group suspend_attr_group = {
>  	.name = "suspend_stats",
>  	.attrs = suspend_attrs,
> +	.is_visible = suspend_attr_is_visible,
>  };
>  
>  #ifdef CONFIG_DEBUG_FS
  
Mario Limonciello April 11, 2023, 9:49 p.m. UTC | #2
[Public]

> 
> On 4/11/23 23:17, Mario Limonciello wrote:
> > Userspace can't easily discover how much of a sleep cycle was spent in a
> > hardware sleep state without using kernel tracing and vendor specific sysfs
> > or debugfs files.
> >
> > To make this information more discoverable, introduce two new sysfs files:
> > 1) The time spent in a hw sleep state for last cycle.
> > 2) The time spent in a hw sleep state since the kernel booted
> > Both of these files will be present only if the system supports s2idle.
> >
> > Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
> > ---
> > v6->v7:
> >  * Rename to max_hw_sleep (David E Box)
> >  * Drop overflow checks (David E Box)
> > v5->v6:
> >  * Add total attribute as well
> >  * Change text for documentation
> >  * Adjust flow of is_visible callback.
> >  * If overflow was detected in total attribute return -EOVERFLOW
> >  * Rename symbol
> >  * Add stub for symbol for builds without CONFIG_PM_SLEEP
> > v4->v5:
> >  * Provide time in microseconds instead of percent. Userspace can convert
> >    this if desirable.
> > ---
> >  Documentation/ABI/testing/sysfs-power | 24 ++++++++++++++++
> >  include/linux/suspend.h               |  5 ++++
> >  kernel/power/main.c                   | 40 +++++++++++++++++++++++++++
> >  3 files changed, 69 insertions(+)
> >
> > diff --git a/Documentation/ABI/testing/sysfs-power
> b/Documentation/ABI/testing/sysfs-power
> > index f99d433ff311..0723b4dadfbe 100644
> > --- a/Documentation/ABI/testing/sysfs-power
> > +++ b/Documentation/ABI/testing/sysfs-power
> > @@ -413,6 +413,30 @@ Description:
> >  		The /sys/power/suspend_stats/last_failed_step file contains
> >  		the last failed step in the suspend/resume path.
> >
> > +What:		/sys/power/suspend_stats/last_hw_sleep
> > +Date:		June 2023
> > +Contact:	Mario Limonciello <mario.limonciello@amd.com>
> > +Description:
> > +		The /sys/power/suspend_stats/last_hw_sleep file
> > +		contains the duration of time spent in a hardware sleep
> > +		state in the most recent system suspend-resume cycle.
> > +		This number is measured in microseconds.
> > +
> > +		NOTE: Limitations in the size of the hardware counters may
> > +		cause this value to be inaccurate in longer sleep cycles.
> 
> Hmm I thought that the plan was to add a separate sysfs attr with
> the max time that the hw could represent here, so that userspace
> actually know what constitutes a "longer sleep cycle" ?
> 
> That would seem better then such a handwavy comment in the ABI docs?

I obviously misunderstood what you were suggesting.
I don't believe we have a way to programmatically determine what the hardware
Internally uses for it's counter to know this.

So it would need to be a table of some sorts that a given system can support
such value.  If we do that, we can actually know whether to return an error code
like -EOVERFLOW or -EINVAL too if the suspend was too long.

I would need Intel guys to share this information though which systems have
which size of counters to make this happen.

> 
> > +
> > +What:		/sys/power/suspend_stats/max_hw_sleep
> > +Date:		June 2023
> > +Contact:	Mario Limonciello <mario.limonciello@amd.com>
> > +Description:
> > +		The /sys/power/suspend_stats/max_hw_sleep file
> > +		contains the aggregate of time spent in a hardware sleep
> > +		state since the kernel was booted. This number
> > +		is measured in microseconds.
> > +
> > +		NOTE: Limitations in the size of the hardware counters may
> > +		cause this value to be inaccurate in longer sleep cycles.
> 
> Maybe "total_hw_sleep" instead of "max_hw_sleep" ? Also since max to
> me sounds like the limit of the longest sleep the hw counters can
> register, so that is bit confusing with the discussion about those
> limits.

total_hw_sleep is actually what was in v6 and max_hw_sleep is what suggested.
That's why I got confused about what you guys meant.

> 
> Regards,
> 
> Hans
> 
> 
> 
> > +
> >  What:		/sys/power/sync_on_suspend
> >  Date:		October 2019
> >  Contact:	Jonas Meurer <jonas@freesources.org>
> > diff --git a/include/linux/suspend.h b/include/linux/suspend.h
> > index cfe19a028918..819e9677fd10 100644
> > --- a/include/linux/suspend.h
> > +++ b/include/linux/suspend.h
> > @@ -68,6 +68,8 @@ struct suspend_stats {
> >  	int	last_failed_errno;
> >  	int	errno[REC_FAILED_NUM];
> >  	int	last_failed_step;
> > +	u64	last_hw_sleep;
> > +	u64	max_hw_sleep;
> >  	enum suspend_stat_step	failed_steps[REC_FAILED_NUM];
> >  };
> >
> > @@ -489,6 +491,7 @@ void restore_processor_state(void);
> >  extern int register_pm_notifier(struct notifier_block *nb);
> >  extern int unregister_pm_notifier(struct notifier_block *nb);
> >  extern void ksys_sync_helper(void);
> > +extern void pm_report_hw_sleep_time(u64 t);
> >
> >  #define pm_notifier(fn, pri) {				\
> >  	static struct notifier_block fn##_nb =			\
> > @@ -526,6 +529,8 @@ static inline int unregister_pm_notifier(struct
> notifier_block *nb)
> >  	return 0;
> >  }
> >
> > +static inline void pm_report_hw_sleep_time(u64 t) {};
> > +
> >  static inline void ksys_sync_helper(void) {}
> >
> >  #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
> > diff --git a/kernel/power/main.c b/kernel/power/main.c
> > index 31ec4a9b9d70..a5546b91ecc9 100644
> > --- a/kernel/power/main.c
> > +++ b/kernel/power/main.c
> > @@ -6,6 +6,7 @@
> >   * Copyright (c) 2003 Open Source Development Lab
> >   */
> >
> > +#include <linux/acpi.h>
> >  #include <linux/export.h>
> >  #include <linux/kobject.h>
> >  #include <linux/string.h>
> > @@ -83,6 +84,13 @@ int unregister_pm_notifier(struct notifier_block *nb)
> >  }
> >  EXPORT_SYMBOL_GPL(unregister_pm_notifier);
> >
> > +void pm_report_hw_sleep_time(u64 t)
> > +{
> > +	suspend_stats.last_hw_sleep = t;
> > +	suspend_stats.max_hw_sleep += t;
> > +}
> > +EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time);
> > +
> >  int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long
> val_down)
> >  {
> >  	int ret;
> > @@ -377,6 +385,22 @@ static ssize_t last_failed_step_show(struct kobject
> *kobj,
> >  }
> >  static struct kobj_attribute last_failed_step =
> __ATTR_RO(last_failed_step);
> >
> > +static ssize_t last_hw_sleep_show(struct kobject *kobj,
> > +		struct kobj_attribute *attr, char *buf)
> > +{
> > +	return sysfs_emit(buf, "%llu\n", suspend_stats.last_hw_sleep);
> > +}
> > +static struct kobj_attribute last_hw_sleep = __ATTR_RO(last_hw_sleep);
> > +
> > +static ssize_t max_hw_sleep_show(struct kobject *kobj,
> > +		struct kobj_attribute *attr, char *buf)
> > +{
> > +	if (suspend_stats.max_hw_sleep == -EOVERFLOW)
> > +		return suspend_stats.max_hw_sleep;
> > +	return sysfs_emit(buf, "%llu\n", suspend_stats.max_hw_sleep);
> > +}
> > +static struct kobj_attribute max_hw_sleep =
> __ATTR_RO(max_hw_sleep);
> > +
> >  static struct attribute *suspend_attrs[] = {
> >  	&success.attr,
> >  	&fail.attr,
> > @@ -391,12 +415,28 @@ static struct attribute *suspend_attrs[] = {
> >  	&last_failed_dev.attr,
> >  	&last_failed_errno.attr,
> >  	&last_failed_step.attr,
> > +	&last_hw_sleep.attr,
> > +	&max_hw_sleep.attr,
> >  	NULL,
> >  };
> >
> > +static umode_t suspend_attr_is_visible(struct kobject *kobj, struct
> attribute *attr, int idx)
> > +{
> > +	if (attr != &last_hw_sleep.attr &&
> > +	    attr != &max_hw_sleep.attr)
> > +		return 0444;
> > +
> > +#ifdef CONFIG_ACPI
> > +	if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)
> > +		return 0444;
> > +#endif
> > +	return 0;
> > +}
> > +
> >  static const struct attribute_group suspend_attr_group = {
> >  	.name = "suspend_stats",
> >  	.attrs = suspend_attrs,
> > +	.is_visible = suspend_attr_is_visible,
> >  };
> >
> >  #ifdef CONFIG_DEBUG_FS
  
Box, David E April 12, 2023, 12:58 a.m. UTC | #3
Hi,

On Tue, 2023-04-11 at 21:49 +0000, Limonciello, Mario wrote:
> [Public]
> 
> > 
> > On 4/11/23 23:17, Mario Limonciello wrote:
> > > Userspace can't easily discover how much of a sleep cycle was spent in a
> > > hardware sleep state without using kernel tracing and vendor specific
> > > sysfs
> > > or debugfs files.
> > > 
> > > To make this information more discoverable, introduce two new sysfs files:
> > > 1) The time spent in a hw sleep state for last cycle.
> > > 2) The time spent in a hw sleep state since the kernel booted
> > > Both of these files will be present only if the system supports s2idle.
> > > 
> > > Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
> > > ---
> > > v6->v7:
> > >  * Rename to max_hw_sleep (David E Box)
> > >  * Drop overflow checks (David E Box)
> > > v5->v6:
> > >  * Add total attribute as well
> > >  * Change text for documentation
> > >  * Adjust flow of is_visible callback.
> > >  * If overflow was detected in total attribute return -EOVERFLOW
> > >  * Rename symbol
> > >  * Add stub for symbol for builds without CONFIG_PM_SLEEP
> > > v4->v5:
> > >  * Provide time in microseconds instead of percent. Userspace can convert
> > >    this if desirable.
> > > ---
> > >  Documentation/ABI/testing/sysfs-power | 24 ++++++++++++++++
> > >  include/linux/suspend.h               |  5 ++++
> > >  kernel/power/main.c                   | 40 +++++++++++++++++++++++++++
> > >  3 files changed, 69 insertions(+)
> > > 
> > > diff --git a/Documentation/ABI/testing/sysfs-power
> > b/Documentation/ABI/testing/sysfs-power
> > > index f99d433ff311..0723b4dadfbe 100644
> > > --- a/Documentation/ABI/testing/sysfs-power
> > > +++ b/Documentation/ABI/testing/sysfs-power
> > > @@ -413,6 +413,30 @@ Description:
> > >                 The /sys/power/suspend_stats/last_failed_step file
> > > contains
> > >                 the last failed step in the suspend/resume path.
> > > 
> > > +What:          /sys/power/suspend_stats/last_hw_sleep
> > > +Date:          June 2023
> > > +Contact:       Mario Limonciello <mario.limonciello@amd.com>
> > > +Description:
> > > +               The /sys/power/suspend_stats/last_hw_sleep file
> > > +               contains the duration of time spent in a hardware sleep
> > > +               state in the most recent system suspend-resume cycle.
> > > +               This number is measured in microseconds.
> > > +
> > > +               NOTE: Limitations in the size of the hardware counters may
> > > +               cause this value to be inaccurate in longer sleep cycles.
> > 
> > Hmm I thought that the plan was to add a separate sysfs attr with
> > the max time that the hw could represent here, so that userspace
> > actually know what constitutes a "longer sleep cycle" ?
> > 
> > That would seem better then such a handwavy comment in the ABI docs?
> 
> I obviously misunderstood what you were suggesting.
> I don't believe we have a way to programmatically determine what the hardware
> Internally uses for it's counter to know this.
> 
> So it would need to be a table of some sorts that a given system can support
> such value.  If we do that, we can actually know whether to return an error
> code
> like -EOVERFLOW or -EINVAL too if the suspend was too long.
> 
> I would need Intel guys to share this information though which systems have
> which size of counters to make this happen.

For Intel all the s0ix counters are 32 bit. If the maximum sleep time is
reported in microseconds it's just

        ((1UL << 32) - 1) * slp_s0_res_counter_step,

where slp_s0_res_counter_step is the platform specific counter granularity in
microseconds. There are some platform specific tweaks (of course). If you
provide a function to call, I can write the patch for intel_pmc_core.

> 
> > 
> > > +
> > > +What:          /sys/power/suspend_stats/max_hw_sleep
> > > +Date:          June 2023
> > > +Contact:       Mario Limonciello <mario.limonciello@amd.com>
> > > +Description:
> > > +               The /sys/power/suspend_stats/max_hw_sleep file
> > > +               contains the aggregate of time spent in a hardware sleep
> > > +               state since the kernel was booted. This number
> > > +               is measured in microseconds.
> > > +
> > > +               NOTE: Limitations in the size of the hardware counters may
> > > +               cause this value to be inaccurate in longer sleep cycles.
> > 
> > Maybe "total_hw_sleep" instead of "max_hw_sleep" ? Also since max to
> > me sounds like the limit of the longest sleep the hw counters can
> > register, so that is bit confusing with the discussion about those
> > limits.
> 
> total_hw_sleep is actually what was in v6 and max_hw_sleep is what suggested.
> That's why I got confused about what you guys meant.

Sorry, I meant max_hw_sleep for the additional attribute as Hans mentioned.

David

> 
> > 
> > Regards,
> > 
> > Hans
> > 
> > 
> > 
> > > +
> > >  What:          /sys/power/sync_on_suspend
> > >  Date:          October 2019
> > >  Contact:       Jonas Meurer <jonas@freesources.org>
> > > diff --git a/include/linux/suspend.h b/include/linux/suspend.h
> > > index cfe19a028918..819e9677fd10 100644
> > > --- a/include/linux/suspend.h
> > > +++ b/include/linux/suspend.h
> > > @@ -68,6 +68,8 @@ struct suspend_stats {
> > >         int     last_failed_errno;
> > >         int     errno[REC_FAILED_NUM];
> > >         int     last_failed_step;
> > > +       u64     last_hw_sleep;
> > > +       u64     max_hw_sleep;
> > >         enum suspend_stat_step  failed_steps[REC_FAILED_NUM];
> > >  };
> > > 
> > > @@ -489,6 +491,7 @@ void restore_processor_state(void);
> > >  extern int register_pm_notifier(struct notifier_block *nb);
> > >  extern int unregister_pm_notifier(struct notifier_block *nb);
> > >  extern void ksys_sync_helper(void);
> > > +extern void pm_report_hw_sleep_time(u64 t);
> > > 
> > >  #define pm_notifier(fn, pri) {                         \
> > >         static struct notifier_block fn##_nb =                  \
> > > @@ -526,6 +529,8 @@ static inline int unregister_pm_notifier(struct
> > notifier_block *nb)
> > >         return 0;
> > >  }
> > > 
> > > +static inline void pm_report_hw_sleep_time(u64 t) {};
> > > +
> > >  static inline void ksys_sync_helper(void) {}
> > > 
> > >  #define pm_notifier(fn, pri)   do { (void)(fn); } while (0)
> > > diff --git a/kernel/power/main.c b/kernel/power/main.c
> > > index 31ec4a9b9d70..a5546b91ecc9 100644
> > > --- a/kernel/power/main.c
> > > +++ b/kernel/power/main.c
> > > @@ -6,6 +6,7 @@
> > >   * Copyright (c) 2003 Open Source Development Lab
> > >   */
> > > 
> > > +#include <linux/acpi.h>
> > >  #include <linux/export.h>
> > >  #include <linux/kobject.h>
> > >  #include <linux/string.h>
> > > @@ -83,6 +84,13 @@ int unregister_pm_notifier(struct notifier_block *nb)
> > >  }
> > >  EXPORT_SYMBOL_GPL(unregister_pm_notifier);
> > > 
> > > +void pm_report_hw_sleep_time(u64 t)
> > > +{
> > > +       suspend_stats.last_hw_sleep = t;
> > > +       suspend_stats.max_hw_sleep += t;
> > > +}
> > > +EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time);
> > > +
> > >  int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long
> > val_down)
> > >  {
> > >         int ret;
> > > @@ -377,6 +385,22 @@ static ssize_t last_failed_step_show(struct kobject
> > *kobj,
> > >  }
> > >  static struct kobj_attribute last_failed_step =
> > __ATTR_RO(last_failed_step);
> > > 
> > > +static ssize_t last_hw_sleep_show(struct kobject *kobj,
> > > +               struct kobj_attribute *attr, char *buf)
> > > +{
> > > +       return sysfs_emit(buf, "%llu\n", suspend_stats.last_hw_sleep);
> > > +}
> > > +static struct kobj_attribute last_hw_sleep = __ATTR_RO(last_hw_sleep);
> > > +
> > > +static ssize_t max_hw_sleep_show(struct kobject *kobj,
> > > +               struct kobj_attribute *attr, char *buf)
> > > +{
> > > +       if (suspend_stats.max_hw_sleep == -EOVERFLOW)
> > > +               return suspend_stats.max_hw_sleep;
> > > +       return sysfs_emit(buf, "%llu\n", suspend_stats.max_hw_sleep);
> > > +}
> > > +static struct kobj_attribute max_hw_sleep =
> > __ATTR_RO(max_hw_sleep);
> > > +
> > >  static struct attribute *suspend_attrs[] = {
> > >         &success.attr,
> > >         &fail.attr,
> > > @@ -391,12 +415,28 @@ static struct attribute *suspend_attrs[] = {
> > >         &last_failed_dev.attr,
> > >         &last_failed_errno.attr,
> > >         &last_failed_step.attr,
> > > +       &last_hw_sleep.attr,
> > > +       &max_hw_sleep.attr,
> > >         NULL,
> > >  };
> > > 
> > > +static umode_t suspend_attr_is_visible(struct kobject *kobj, struct
> > attribute *attr, int idx)
> > > +{
> > > +       if (attr != &last_hw_sleep.attr &&
> > > +           attr != &max_hw_sleep.attr)
> > > +               return 0444;
> > > +
> > > +#ifdef CONFIG_ACPI
> > > +       if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)
> > > +               return 0444;
> > > +#endif
> > > +       return 0;
> > > +}
> > > +
> > >  static const struct attribute_group suspend_attr_group = {
> > >         .name = "suspend_stats",
> > >         .attrs = suspend_attrs,
> > > +       .is_visible = suspend_attr_is_visible,
> > >  };
> > > 
> > >  #ifdef CONFIG_DEBUG_FS
  
Hans de Goede April 12, 2023, 8:47 a.m. UTC | #4
Hi all,

On 4/12/23 02:58, Box, David E wrote:
> Hi,
> 
> On Tue, 2023-04-11 at 21:49 +0000, Limonciello, Mario wrote:
>> [Public]
>>
>>>
>>> On 4/11/23 23:17, Mario Limonciello wrote:
>>>> Userspace can't easily discover how much of a sleep cycle was spent in a
>>>> hardware sleep state without using kernel tracing and vendor specific
>>>> sysfs
>>>> or debugfs files.
>>>>
>>>> To make this information more discoverable, introduce two new sysfs files:
>>>> 1) The time spent in a hw sleep state for last cycle.
>>>> 2) The time spent in a hw sleep state since the kernel booted
>>>> Both of these files will be present only if the system supports s2idle.
>>>>
>>>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
>>>> ---
>>>> v6->v7:
>>>>  * Rename to max_hw_sleep (David E Box)
>>>>  * Drop overflow checks (David E Box)
>>>> v5->v6:
>>>>  * Add total attribute as well
>>>>  * Change text for documentation
>>>>  * Adjust flow of is_visible callback.
>>>>  * If overflow was detected in total attribute return -EOVERFLOW
>>>>  * Rename symbol
>>>>  * Add stub for symbol for builds without CONFIG_PM_SLEEP
>>>> v4->v5:
>>>>  * Provide time in microseconds instead of percent. Userspace can convert
>>>>    this if desirable.
>>>> ---
>>>>  Documentation/ABI/testing/sysfs-power | 24 ++++++++++++++++
>>>>  include/linux/suspend.h               |  5 ++++
>>>>  kernel/power/main.c                   | 40 +++++++++++++++++++++++++++
>>>>  3 files changed, 69 insertions(+)
>>>>
>>>> diff --git a/Documentation/ABI/testing/sysfs-power
>>> b/Documentation/ABI/testing/sysfs-power
>>>> index f99d433ff311..0723b4dadfbe 100644
>>>> --- a/Documentation/ABI/testing/sysfs-power
>>>> +++ b/Documentation/ABI/testing/sysfs-power
>>>> @@ -413,6 +413,30 @@ Description:
>>>>                 The /sys/power/suspend_stats/last_failed_step file
>>>> contains
>>>>                 the last failed step in the suspend/resume path.
>>>>
>>>> +What:          /sys/power/suspend_stats/last_hw_sleep
>>>> +Date:          June 2023
>>>> +Contact:       Mario Limonciello <mario.limonciello@amd.com>
>>>> +Description:
>>>> +               The /sys/power/suspend_stats/last_hw_sleep file
>>>> +               contains the duration of time spent in a hardware sleep
>>>> +               state in the most recent system suspend-resume cycle.
>>>> +               This number is measured in microseconds.
>>>> +
>>>> +               NOTE: Limitations in the size of the hardware counters may
>>>> +               cause this value to be inaccurate in longer sleep cycles.
>>>
>>> Hmm I thought that the plan was to add a separate sysfs attr with
>>> the max time that the hw could represent here, so that userspace
>>> actually know what constitutes a "longer sleep cycle" ?
>>>
>>> That would seem better then such a handwavy comment in the ABI docs?
>>
>> I obviously misunderstood what you were suggesting.
>> I don't believe we have a way to programmatically determine what the hardware
>> Internally uses for it's counter to know this.
>>
>> So it would need to be a table of some sorts that a given system can support
>> such value.  If we do that, we can actually know whether to return an error
>> code
>> like -EOVERFLOW or -EINVAL too if the suspend was too long.
>>
>> I would need Intel guys to share this information though which systems have
>> which size of counters to make this happen.
> 
> For Intel all the s0ix counters are 32 bit. If the maximum sleep time is
> reported in microseconds it's just
> 
>         ((1UL << 32) - 1) * slp_s0_res_counter_step,
> 
> where slp_s0_res_counter_step is the platform specific counter granularity in
> microseconds. There are some platform specific tweaks (of course). If you
> provide a function to call, I can write the patch for intel_pmc_core.

FWIW the above plan sounds go to me. 

Regards,

Hans





>>>> +What:          /sys/power/suspend_stats/max_hw_sleep
>>>> +Date:          June 2023
>>>> +Contact:       Mario Limonciello <mario.limonciello@amd.com>
>>>> +Description:
>>>> +               The /sys/power/suspend_stats/max_hw_sleep file
>>>> +               contains the aggregate of time spent in a hardware sleep
>>>> +               state since the kernel was booted. This number
>>>> +               is measured in microseconds.
>>>> +
>>>> +               NOTE: Limitations in the size of the hardware counters may
>>>> +               cause this value to be inaccurate in longer sleep cycles.
>>>
>>> Maybe "total_hw_sleep" instead of "max_hw_sleep" ? Also since max to
>>> me sounds like the limit of the longest sleep the hw counters can
>>> register, so that is bit confusing with the discussion about those
>>> limits.
>>
>> total_hw_sleep is actually what was in v6 and max_hw_sleep is what suggested.
>> That's why I got confused about what you guys meant.
> 
> Sorry, I meant max_hw_sleep for the additional attribute as Hans mentioned.
> 
> David
> 
>>
>>>
>>> Regards,
>>>
>>> Hans
>>>
>>>
>>>
>>>> +
>>>>  What:          /sys/power/sync_on_suspend
>>>>  Date:          October 2019
>>>>  Contact:       Jonas Meurer <jonas@freesources.org>
>>>> diff --git a/include/linux/suspend.h b/include/linux/suspend.h
>>>> index cfe19a028918..819e9677fd10 100644
>>>> --- a/include/linux/suspend.h
>>>> +++ b/include/linux/suspend.h
>>>> @@ -68,6 +68,8 @@ struct suspend_stats {
>>>>         int     last_failed_errno;
>>>>         int     errno[REC_FAILED_NUM];
>>>>         int     last_failed_step;
>>>> +       u64     last_hw_sleep;
>>>> +       u64     max_hw_sleep;
>>>>         enum suspend_stat_step  failed_steps[REC_FAILED_NUM];
>>>>  };
>>>>
>>>> @@ -489,6 +491,7 @@ void restore_processor_state(void);
>>>>  extern int register_pm_notifier(struct notifier_block *nb);
>>>>  extern int unregister_pm_notifier(struct notifier_block *nb);
>>>>  extern void ksys_sync_helper(void);
>>>> +extern void pm_report_hw_sleep_time(u64 t);
>>>>
>>>>  #define pm_notifier(fn, pri) {                         \
>>>>         static struct notifier_block fn##_nb =                  \
>>>> @@ -526,6 +529,8 @@ static inline int unregister_pm_notifier(struct
>>> notifier_block *nb)
>>>>         return 0;
>>>>  }
>>>>
>>>> +static inline void pm_report_hw_sleep_time(u64 t) {};
>>>> +
>>>>  static inline void ksys_sync_helper(void) {}
>>>>
>>>>  #define pm_notifier(fn, pri)   do { (void)(fn); } while (0)
>>>> diff --git a/kernel/power/main.c b/kernel/power/main.c
>>>> index 31ec4a9b9d70..a5546b91ecc9 100644
>>>> --- a/kernel/power/main.c
>>>> +++ b/kernel/power/main.c
>>>> @@ -6,6 +6,7 @@
>>>>   * Copyright (c) 2003 Open Source Development Lab
>>>>   */
>>>>
>>>> +#include <linux/acpi.h>
>>>>  #include <linux/export.h>
>>>>  #include <linux/kobject.h>
>>>>  #include <linux/string.h>
>>>> @@ -83,6 +84,13 @@ int unregister_pm_notifier(struct notifier_block *nb)
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(unregister_pm_notifier);
>>>>
>>>> +void pm_report_hw_sleep_time(u64 t)
>>>> +{
>>>> +       suspend_stats.last_hw_sleep = t;
>>>> +       suspend_stats.max_hw_sleep += t;
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time);
>>>> +
>>>>  int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long
>>> val_down)
>>>>  {
>>>>         int ret;
>>>> @@ -377,6 +385,22 @@ static ssize_t last_failed_step_show(struct kobject
>>> *kobj,
>>>>  }
>>>>  static struct kobj_attribute last_failed_step =
>>> __ATTR_RO(last_failed_step);
>>>>
>>>> +static ssize_t last_hw_sleep_show(struct kobject *kobj,
>>>> +               struct kobj_attribute *attr, char *buf)
>>>> +{
>>>> +       return sysfs_emit(buf, "%llu\n", suspend_stats.last_hw_sleep);
>>>> +}
>>>> +static struct kobj_attribute last_hw_sleep = __ATTR_RO(last_hw_sleep);
>>>> +
>>>> +static ssize_t max_hw_sleep_show(struct kobject *kobj,
>>>> +               struct kobj_attribute *attr, char *buf)
>>>> +{
>>>> +       if (suspend_stats.max_hw_sleep == -EOVERFLOW)
>>>> +               return suspend_stats.max_hw_sleep;
>>>> +       return sysfs_emit(buf, "%llu\n", suspend_stats.max_hw_sleep);
>>>> +}
>>>> +static struct kobj_attribute max_hw_sleep =
>>> __ATTR_RO(max_hw_sleep);
>>>> +
>>>>  static struct attribute *suspend_attrs[] = {
>>>>         &success.attr,
>>>>         &fail.attr,
>>>> @@ -391,12 +415,28 @@ static struct attribute *suspend_attrs[] = {
>>>>         &last_failed_dev.attr,
>>>>         &last_failed_errno.attr,
>>>>         &last_failed_step.attr,
>>>> +       &last_hw_sleep.attr,
>>>> +       &max_hw_sleep.attr,
>>>>         NULL,
>>>>  };
>>>>
>>>> +static umode_t suspend_attr_is_visible(struct kobject *kobj, struct
>>> attribute *attr, int idx)
>>>> +{
>>>> +       if (attr != &last_hw_sleep.attr &&
>>>> +           attr != &max_hw_sleep.attr)
>>>> +               return 0444;
>>>> +
>>>> +#ifdef CONFIG_ACPI
>>>> +       if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)
>>>> +               return 0444;
>>>> +#endif
>>>> +       return 0;
>>>> +}
>>>> +
>>>>  static const struct attribute_group suspend_attr_group = {
>>>>         .name = "suspend_stats",
>>>>         .attrs = suspend_attrs,
>>>> +       .is_visible = suspend_attr_is_visible,
>>>>  };
>>>>
>>>>  #ifdef CONFIG_DEBUG_FS
>
  

Patch

diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index f99d433ff311..0723b4dadfbe 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -413,6 +413,30 @@  Description:
 		The /sys/power/suspend_stats/last_failed_step file contains
 		the last failed step in the suspend/resume path.
 
+What:		/sys/power/suspend_stats/last_hw_sleep
+Date:		June 2023
+Contact:	Mario Limonciello <mario.limonciello@amd.com>
+Description:
+		The /sys/power/suspend_stats/last_hw_sleep file
+		contains the duration of time spent in a hardware sleep
+		state in the most recent system suspend-resume cycle.
+		This number is measured in microseconds.
+
+		NOTE: Limitations in the size of the hardware counters may
+		cause this value to be inaccurate in longer sleep cycles.
+
+What:		/sys/power/suspend_stats/max_hw_sleep
+Date:		June 2023
+Contact:	Mario Limonciello <mario.limonciello@amd.com>
+Description:
+		The /sys/power/suspend_stats/max_hw_sleep file
+		contains the aggregate of time spent in a hardware sleep
+		state since the kernel was booted. This number
+		is measured in microseconds.
+
+		NOTE: Limitations in the size of the hardware counters may
+		cause this value to be inaccurate in longer sleep cycles.
+
 What:		/sys/power/sync_on_suspend
 Date:		October 2019
 Contact:	Jonas Meurer <jonas@freesources.org>
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index cfe19a028918..819e9677fd10 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -68,6 +68,8 @@  struct suspend_stats {
 	int	last_failed_errno;
 	int	errno[REC_FAILED_NUM];
 	int	last_failed_step;
+	u64	last_hw_sleep;
+	u64	max_hw_sleep;
 	enum suspend_stat_step	failed_steps[REC_FAILED_NUM];
 };
 
@@ -489,6 +491,7 @@  void restore_processor_state(void);
 extern int register_pm_notifier(struct notifier_block *nb);
 extern int unregister_pm_notifier(struct notifier_block *nb);
 extern void ksys_sync_helper(void);
+extern void pm_report_hw_sleep_time(u64 t);
 
 #define pm_notifier(fn, pri) {				\
 	static struct notifier_block fn##_nb =			\
@@ -526,6 +529,8 @@  static inline int unregister_pm_notifier(struct notifier_block *nb)
 	return 0;
 }
 
+static inline void pm_report_hw_sleep_time(u64 t) {};
+
 static inline void ksys_sync_helper(void) {}
 
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 31ec4a9b9d70..a5546b91ecc9 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -6,6 +6,7 @@ 
  * Copyright (c) 2003 Open Source Development Lab
  */
 
+#include <linux/acpi.h>
 #include <linux/export.h>
 #include <linux/kobject.h>
 #include <linux/string.h>
@@ -83,6 +84,13 @@  int unregister_pm_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(unregister_pm_notifier);
 
+void pm_report_hw_sleep_time(u64 t)
+{
+	suspend_stats.last_hw_sleep = t;
+	suspend_stats.max_hw_sleep += t;
+}
+EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time);
+
 int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down)
 {
 	int ret;
@@ -377,6 +385,22 @@  static ssize_t last_failed_step_show(struct kobject *kobj,
 }
 static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step);
 
+static ssize_t last_hw_sleep_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%llu\n", suspend_stats.last_hw_sleep);
+}
+static struct kobj_attribute last_hw_sleep = __ATTR_RO(last_hw_sleep);
+
+static ssize_t max_hw_sleep_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	if (suspend_stats.max_hw_sleep == -EOVERFLOW)
+		return suspend_stats.max_hw_sleep;
+	return sysfs_emit(buf, "%llu\n", suspend_stats.max_hw_sleep);
+}
+static struct kobj_attribute max_hw_sleep = __ATTR_RO(max_hw_sleep);
+
 static struct attribute *suspend_attrs[] = {
 	&success.attr,
 	&fail.attr,
@@ -391,12 +415,28 @@  static struct attribute *suspend_attrs[] = {
 	&last_failed_dev.attr,
 	&last_failed_errno.attr,
 	&last_failed_step.attr,
+	&last_hw_sleep.attr,
+	&max_hw_sleep.attr,
 	NULL,
 };
 
+static umode_t suspend_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+	if (attr != &last_hw_sleep.attr &&
+	    attr != &max_hw_sleep.attr)
+		return 0444;
+
+#ifdef CONFIG_ACPI
+	if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)
+		return 0444;
+#endif
+	return 0;
+}
+
 static const struct attribute_group suspend_attr_group = {
 	.name = "suspend_stats",
 	.attrs = suspend_attrs,
+	.is_visible = suspend_attr_is_visible,
 };
 
 #ifdef CONFIG_DEBUG_FS