[V4,16/17] platform/x86/intel/pmc: Add debug attribute for Die C6 counter
Commit Message
Add a "die_c6_us_show" debugfs attribute. Reads the counter value using
Intel Platform Monitoring Technology (PMT) driver API. This counter is
useful for determining the idle residency of CPUs in the compute tile.
Signed-off-by: David E. Box <david.e.box@linux.intel.com>
---
V4 - no change
V3 - Split previous PATCH V2 13. Separates implementation (this patch) from
platform specific use (next patch)
V2 - Remove use of __func__
- Use HZ_PER_MHZ
- Fix missing newlines in printks
drivers/platform/x86/intel/pmc/core.c | 55 +++++++++++++++++++++++++++
drivers/platform/x86/intel/pmc/core.h | 4 ++
2 files changed, 59 insertions(+)
Comments
On Wed, 18 Oct 2023, David E. Box wrote:
> Add a "die_c6_us_show" debugfs attribute. Reads the counter value using
> Intel Platform Monitoring Technology (PMT) driver API. This counter is
> useful for determining the idle residency of CPUs in the compute tile.
>
> Signed-off-by: David E. Box <david.e.box@linux.intel.com>
> ---
> V4 - no change
>
> V3 - Split previous PATCH V2 13. Separates implementation (this patch) from
> platform specific use (next patch)
>
> V2 - Remove use of __func__
> - Use HZ_PER_MHZ
> - Fix missing newlines in printks
>
> drivers/platform/x86/intel/pmc/core.c | 55 +++++++++++++++++++++++++++
> drivers/platform/x86/intel/pmc/core.h | 4 ++
> 2 files changed, 59 insertions(+)
>
> diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c
> index fcb0dc702aea..02f3e909cf22 100644
> --- a/drivers/platform/x86/intel/pmc/core.c
> +++ b/drivers/platform/x86/intel/pmc/core.c
> @@ -20,6 +20,7 @@
> #include <linux/pci.h>
> #include <linux/slab.h>
> #include <linux/suspend.h>
> +#include <linux/units.h>
>
> #include <asm/cpu_device_id.h>
> #include <asm/intel-family.h>
> @@ -27,6 +28,7 @@
> #include <asm/tsc.h>
>
> #include "core.h"
> +#include "../pmt/telemetry.h"
>
> /* Maximum number of modes supported by platfoms that has low power mode capability */
> const char *pmc_lpm_modes[] = {
> @@ -822,6 +824,47 @@ static int pmc_core_substate_req_regs_show(struct seq_file *s, void *unused)
> }
> DEFINE_SHOW_ATTRIBUTE(pmc_core_substate_req_regs);
>
> +static unsigned int pmc_core_get_crystal_freq(void)
> +{
> + unsigned int eax_denominator, ebx_numerator, ecx_hz, edx;
> +
> + if (boot_cpu_data.cpuid_level < 0x15)
> + return 0;
> +
> + eax_denominator = ebx_numerator = ecx_hz = edx = 0;
> +
> + /* CPUID 15H TSC/Crystal ratio, plus optionally Crystal Hz */
> + cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx);
> +
> + if (ebx_numerator == 0 || eax_denominator == 0)
> + return 0;
> +
> + return ecx_hz;
> +}
> +
> +static int pmc_core_die_c6_us_show(struct seq_file *s, void *unused)
> +{
> + struct pmc_dev *pmcdev = s->private;
> + u64 die_c6_res, count;
> + int ret;
> +
> + if (!pmcdev->crystal_freq) {
> + dev_warn_once(&pmcdev->pdev->dev, "Bad crystal frequency\n");
Isn't it more like crystal frequency is not provided rather than bad
frequency?
> + return -EINVAL;
-EINVAL is not good value to return here since there was nothing wrong
with the input. Maybe -ENXIO would be better.
> + }
> +
> + ret = pmt_telem_read(pmcdev->punit_ep, pmcdev->die_c6_offset,
> + &count, 1);
> + if (ret)
> + return ret;
> +
> + die_c6_res = div64_u64(count * HZ_PER_MHZ, pmcdev->crystal_freq);
> + seq_printf(s, "%llu\n", die_c6_res);
> +
> + return 0;
> +}
> +DEFINE_SHOW_ATTRIBUTE(pmc_core_die_c6_us);
> +
> static int pmc_core_lpm_latch_mode_show(struct seq_file *s, void *unused)
> {
> struct pmc_dev *pmcdev = s->private;
> @@ -1118,6 +1161,12 @@ static void pmc_core_dbgfs_register(struct pmc_dev *pmcdev)
> pmcdev->dbgfs_dir, pmcdev,
> &pmc_core_substate_req_regs_fops);
> }
> +
> + if (pmcdev->has_die_c6) {
> + debugfs_create_file("die_c6_us_show", 0444,
> + pmcdev->dbgfs_dir, pmcdev,
> + &pmc_core_die_c6_us_fops);
> + }
> }
>
> static const struct x86_cpu_id intel_pmc_core_ids[] = {
> @@ -1212,6 +1261,10 @@ static void pmc_core_clean_structure(struct platform_device *pdev)
> pci_dev_put(pmcdev->ssram_pcidev);
> pci_disable_device(pmcdev->ssram_pcidev);
> }
> +
> + if (pmcdev->punit_ep)
> + pmt_telem_unregister_endpoint(pmcdev->punit_ep);
> +
> platform_set_drvdata(pdev, NULL);
> mutex_destroy(&pmcdev->lock);
> }
> @@ -1232,6 +1285,8 @@ static int pmc_core_probe(struct platform_device *pdev)
> if (!pmcdev)
> return -ENOMEM;
>
> + pmcdev->crystal_freq = pmc_core_get_crystal_freq();
> +
> platform_set_drvdata(pdev, pmcdev);
> pmcdev->pdev = pdev;
>
> diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h
> index 85b6f6ae4995..6d7673145f90 100644
> --- a/drivers/platform/x86/intel/pmc/core.h
> +++ b/drivers/platform/x86/intel/pmc/core.h
> @@ -16,6 +16,8 @@
> #include <linux/bits.h>
> #include <linux/platform_device.h>
>
> +struct telem_endpoint;
> +
This seems unrelated to the patch.
On Mon, 2023-10-23 at 19:31 +0300, Ilpo Järvinen wrote:
> On Wed, 18 Oct 2023, David E. Box wrote:
>
> > Add a "die_c6_us_show" debugfs attribute. Reads the counter value using
> > Intel Platform Monitoring Technology (PMT) driver API. This counter is
> > useful for determining the idle residency of CPUs in the compute tile.
> >
> > Signed-off-by: David E. Box <david.e.box@linux.intel.com>
> > ---
> > V4 - no change
> >
> > V3 - Split previous PATCH V2 13. Separates implementation (this patch) from
> > platform specific use (next patch)
> >
> > V2 - Remove use of __func__
> > - Use HZ_PER_MHZ
> > - Fix missing newlines in printks
> >
> > drivers/platform/x86/intel/pmc/core.c | 55 +++++++++++++++++++++++++++
> > drivers/platform/x86/intel/pmc/core.h | 4 ++
> > 2 files changed, 59 insertions(+)
> >
> > diff --git a/drivers/platform/x86/intel/pmc/core.c
> > b/drivers/platform/x86/intel/pmc/core.c
> > index fcb0dc702aea..02f3e909cf22 100644
> > --- a/drivers/platform/x86/intel/pmc/core.c
> > +++ b/drivers/platform/x86/intel/pmc/core.c
> > @@ -20,6 +20,7 @@
> > #include <linux/pci.h>
> > #include <linux/slab.h>
> > #include <linux/suspend.h>
> > +#include <linux/units.h>
> >
> > #include <asm/cpu_device_id.h>
> > #include <asm/intel-family.h>
> > @@ -27,6 +28,7 @@
> > #include <asm/tsc.h>
> >
> > #include "core.h"
> > +#include "../pmt/telemetry.h"
> >
> > /* Maximum number of modes supported by platfoms that has low power mode
> > capability */
> > const char *pmc_lpm_modes[] = {
> > @@ -822,6 +824,47 @@ static int pmc_core_substate_req_regs_show(struct
> > seq_file *s, void *unused)
> > }
> > DEFINE_SHOW_ATTRIBUTE(pmc_core_substate_req_regs);
> >
> > +static unsigned int pmc_core_get_crystal_freq(void)
> > +{
> > + unsigned int eax_denominator, ebx_numerator, ecx_hz, edx;
> > +
> > + if (boot_cpu_data.cpuid_level < 0x15)
> > + return 0;
> > +
> > + eax_denominator = ebx_numerator = ecx_hz = edx = 0;
> > +
> > + /* CPUID 15H TSC/Crystal ratio, plus optionally Crystal Hz */
> > + cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx);
> > +
> > + if (ebx_numerator == 0 || eax_denominator == 0)
> > + return 0;
> > +
> > + return ecx_hz;
> > +}
> > +
> > +static int pmc_core_die_c6_us_show(struct seq_file *s, void *unused)
> > +{
> > + struct pmc_dev *pmcdev = s->private;
> > + u64 die_c6_res, count;
> > + int ret;
> > +
> > + if (!pmcdev->crystal_freq) {
> > + dev_warn_once(&pmcdev->pdev->dev, "Bad crystal
> > frequency\n");
>
> Isn't it more like crystal frequency is not provided rather than bad
> frequency?
>
> > + return -EINVAL;
>
> -EINVAL is not good value to return here since there was nothing wrong
> with the input. Maybe -ENXIO would be better.
Will make these changes.
>
> > + }
> > +
> > + ret = pmt_telem_read(pmcdev->punit_ep, pmcdev->die_c6_offset,
> > + &count, 1);
> > + if (ret)
> > + return ret;
> > +
> > + die_c6_res = div64_u64(count * HZ_PER_MHZ, pmcdev->crystal_freq);
> > + seq_printf(s, "%llu\n", die_c6_res);
> > +
> > + return 0;
> > +}
> > +DEFINE_SHOW_ATTRIBUTE(pmc_core_die_c6_us);
> > +
> > static int pmc_core_lpm_latch_mode_show(struct seq_file *s, void *unused)
> > {
> > struct pmc_dev *pmcdev = s->private;
> > @@ -1118,6 +1161,12 @@ static void pmc_core_dbgfs_register(struct pmc_dev
> > *pmcdev)
> > pmcdev->dbgfs_dir, pmcdev,
> > &pmc_core_substate_req_regs_fops);
> > }
> > +
> > + if (pmcdev->has_die_c6) {
> > + debugfs_create_file("die_c6_us_show", 0444,
> > + pmcdev->dbgfs_dir, pmcdev,
> > + &pmc_core_die_c6_us_fops);
> > + }
> > }
> >
> > static const struct x86_cpu_id intel_pmc_core_ids[] = {
> > @@ -1212,6 +1261,10 @@ static void pmc_core_clean_structure(struct
> > platform_device *pdev)
> > pci_dev_put(pmcdev->ssram_pcidev);
> > pci_disable_device(pmcdev->ssram_pcidev);
> > }
> > +
> > + if (pmcdev->punit_ep)
...
> > + pmt_telem_unregister_endpoint(pmcdev->punit_ep);
> > +
> > platform_set_drvdata(pdev, NULL);
> > mutex_destroy(&pmcdev->lock);
> > }
> > @@ -1232,6 +1285,8 @@ static int pmc_core_probe(struct platform_device
> > *pdev)
> > if (!pmcdev)
> > return -ENOMEM;
> >
> > + pmcdev->crystal_freq = pmc_core_get_crystal_freq();
> > +
> > platform_set_drvdata(pdev, pmcdev);
> > pmcdev->pdev = pdev;
> >
> > diff --git a/drivers/platform/x86/intel/pmc/core.h
> > b/drivers/platform/x86/intel/pmc/core.h
> > index 85b6f6ae4995..6d7673145f90 100644
> > --- a/drivers/platform/x86/intel/pmc/core.h
> > +++ b/drivers/platform/x86/intel/pmc/core.h
> > @@ -16,6 +16,8 @@
> > #include <linux/bits.h>
> > #include <linux/platform_device.h>
> >
> > +struct telem_endpoint;
> > +
>
> This seems unrelated to the patch.
This was missing when "struct telem_endpoint *punit_ep" was declared in this
header in an earlier patch that went upstream. But punit_ep does not get used
yet until this patch as shown above. Will clarify in the changelog.
David
>
@@ -20,6 +20,7 @@
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/suspend.h>
+#include <linux/units.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
@@ -27,6 +28,7 @@
#include <asm/tsc.h>
#include "core.h"
+#include "../pmt/telemetry.h"
/* Maximum number of modes supported by platfoms that has low power mode capability */
const char *pmc_lpm_modes[] = {
@@ -822,6 +824,47 @@ static int pmc_core_substate_req_regs_show(struct seq_file *s, void *unused)
}
DEFINE_SHOW_ATTRIBUTE(pmc_core_substate_req_regs);
+static unsigned int pmc_core_get_crystal_freq(void)
+{
+ unsigned int eax_denominator, ebx_numerator, ecx_hz, edx;
+
+ if (boot_cpu_data.cpuid_level < 0x15)
+ return 0;
+
+ eax_denominator = ebx_numerator = ecx_hz = edx = 0;
+
+ /* CPUID 15H TSC/Crystal ratio, plus optionally Crystal Hz */
+ cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx);
+
+ if (ebx_numerator == 0 || eax_denominator == 0)
+ return 0;
+
+ return ecx_hz;
+}
+
+static int pmc_core_die_c6_us_show(struct seq_file *s, void *unused)
+{
+ struct pmc_dev *pmcdev = s->private;
+ u64 die_c6_res, count;
+ int ret;
+
+ if (!pmcdev->crystal_freq) {
+ dev_warn_once(&pmcdev->pdev->dev, "Bad crystal frequency\n");
+ return -EINVAL;
+ }
+
+ ret = pmt_telem_read(pmcdev->punit_ep, pmcdev->die_c6_offset,
+ &count, 1);
+ if (ret)
+ return ret;
+
+ die_c6_res = div64_u64(count * HZ_PER_MHZ, pmcdev->crystal_freq);
+ seq_printf(s, "%llu\n", die_c6_res);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(pmc_core_die_c6_us);
+
static int pmc_core_lpm_latch_mode_show(struct seq_file *s, void *unused)
{
struct pmc_dev *pmcdev = s->private;
@@ -1118,6 +1161,12 @@ static void pmc_core_dbgfs_register(struct pmc_dev *pmcdev)
pmcdev->dbgfs_dir, pmcdev,
&pmc_core_substate_req_regs_fops);
}
+
+ if (pmcdev->has_die_c6) {
+ debugfs_create_file("die_c6_us_show", 0444,
+ pmcdev->dbgfs_dir, pmcdev,
+ &pmc_core_die_c6_us_fops);
+ }
}
static const struct x86_cpu_id intel_pmc_core_ids[] = {
@@ -1212,6 +1261,10 @@ static void pmc_core_clean_structure(struct platform_device *pdev)
pci_dev_put(pmcdev->ssram_pcidev);
pci_disable_device(pmcdev->ssram_pcidev);
}
+
+ if (pmcdev->punit_ep)
+ pmt_telem_unregister_endpoint(pmcdev->punit_ep);
+
platform_set_drvdata(pdev, NULL);
mutex_destroy(&pmcdev->lock);
}
@@ -1232,6 +1285,8 @@ static int pmc_core_probe(struct platform_device *pdev)
if (!pmcdev)
return -ENOMEM;
+ pmcdev->crystal_freq = pmc_core_get_crystal_freq();
+
platform_set_drvdata(pdev, pmcdev);
pmcdev->pdev = pdev;
@@ -16,6 +16,8 @@
#include <linux/bits.h>
#include <linux/platform_device.h>
+struct telem_endpoint;
+
#define SLP_S0_RES_COUNTER_MASK GENMASK(31, 0)
#define PMC_BASE_ADDR_DEFAULT 0xFE000000
@@ -357,6 +359,7 @@ struct pmc {
* @devs: pointer to an array of pmc pointers
* @pdev: pointer to platform_device struct
* @ssram_pcidev: pointer to pci device struct for the PMC SSRAM
+ * @crystal_freq: crystal frequency from cpuid
* @dbgfs_dir: path to debugfs interface
* @pmc_xram_read_bit: flag to indicate whether PMC XRAM shadow registers
* used to read MPHY PG and PLL status are available
@@ -374,6 +377,7 @@ struct pmc_dev {
struct dentry *dbgfs_dir;
struct platform_device *pdev;
struct pci_dev *ssram_pcidev;
+ unsigned int crystal_freq;
int pmc_xram_read_bit;
struct mutex lock; /* generic mutex lock for PMC Core */