[v9,08/13] x86/resctrl: Add sysfs interface to read mbm_total_bytes_config

Message ID 166990901578.17806.14559688057407662110.stgit@bmoger-ubuntu
State New
Headers
Series Support for AMD QoS new features |

Commit Message

Moger, Babu Dec. 1, 2022, 3:36 p.m. UTC
  The current event configuration can be viewed by the user by reading
the configuration file /sys/fs/resctrl/info/L3_MON/mbm_total_bytes_config.
The event configuration settings are domain specific and will affect all
the CPUs in the domain.

Following are the types of events supported:
====  ===========================================================
Bits   Description
====  ===========================================================
6      Dirty Victims from the QOS domain to all types of memory
5      Reads to slow memory in the non-local NUMA domain
4      Reads to slow memory in the local NUMA domain
3      Non-temporal writes to non-local NUMA domain
2      Non-temporal writes to local NUMA domain
1      Reads to memory in the non-local NUMA domain
0      Reads to memory in the local NUMA domain
====  ===========================================================

By default, the mbm_total_bytes_config is set to 0x7f to count all the
event types.

For example:
    $cat /sys/fs/resctrl/info/L3_MON/mbm_total_bytes_config
    0=0x7f;1=0x7f;2=0x7f;3=0x7f

    In this case, the event mbm_total_bytes is currently configured
    with 0x7f on domains 0 to 3.

Signed-off-by: Babu Moger <babu.moger@amd.com>
---
 arch/x86/include/asm/msr-index.h       |    1 
 arch/x86/kernel/cpu/resctrl/internal.h |   24 ++++++++
 arch/x86/kernel/cpu/resctrl/monitor.c  |    4 +
 arch/x86/kernel/cpu/resctrl/rdtgroup.c |   99 ++++++++++++++++++++++++++++++++
 4 files changed, 127 insertions(+), 1 deletion(-)
  

Comments

Reinette Chatre Dec. 15, 2022, 5:40 p.m. UTC | #1
Hi Babu,

I would like to second James's suggestion to replace sysfs with resctrl
or just remove it. I am concerned that you mentioned in recent message
that you only plan changes to patch 10 while James highlighted that this
needs to be addressed in entire series. Could you please ensure that
you check all the patches?

On 12/1/2022 7:36 AM, Babu Moger wrote:
> The current event configuration can be viewed by the user by reading

What "current" means is not clear and the term could just be removed.

> the configuration file /sys/fs/resctrl/info/L3_MON/mbm_total_bytes_config.
> The event configuration settings are domain specific and will affect all
> the CPUs in the domain.
> 
> Following are the types of events supported:
> ====  ===========================================================
> Bits   Description
> ====  ===========================================================
> 6      Dirty Victims from the QOS domain to all types of memory
> 5      Reads to slow memory in the non-local NUMA domain
> 4      Reads to slow memory in the local NUMA domain
> 3      Non-temporal writes to non-local NUMA domain
> 2      Non-temporal writes to local NUMA domain
> 1      Reads to memory in the non-local NUMA domain
> 0      Reads to memory in the local NUMA domain
> ====  ===========================================================
> 
> By default, the mbm_total_bytes_config is set to 0x7f to count all the
> event types.
> 
> For example:
>     $cat /sys/fs/resctrl/info/L3_MON/mbm_total_bytes_config
>     0=0x7f;1=0x7f;2=0x7f;3=0x7f
> 
>     In this case, the event mbm_total_bytes is currently configured
>     with 0x7f on domains 0 to 3.

"currently" can be removed since it already starts with "In this case".


> Signed-off-by: Babu Moger <babu.moger@amd.com>
> ---
>  arch/x86/include/asm/msr-index.h       |    1 
>  arch/x86/kernel/cpu/resctrl/internal.h |   24 ++++++++
>  arch/x86/kernel/cpu/resctrl/monitor.c  |    4 +
>  arch/x86/kernel/cpu/resctrl/rdtgroup.c |   99 ++++++++++++++++++++++++++++++++
>  4 files changed, 127 insertions(+), 1 deletion(-)
> 

...

> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 8342feb54a7f..e93b1c206116 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -1423,6 +1423,90 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
>  	return ret;
>  }
>  
> +struct mon_config_info {
> +	u32 evtid;
> +	u32 mon_config;
> +};
> +
> +#define INVALID_CONFIG_INDEX   UINT_MAX
> +
> +/**
> + * mon_event_config_index_get - get the index for the configurable event

Could you say "get the hardware index" to help clarify what the index is
for?

> + * @evtid: event id.
> + *
> + * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID
> + *         1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID
> + *         INVALID_CONFIG_INDEX for invalid evtid
> + */
> +static inline unsigned int mon_event_config_index_get(u32 evtid)
> +{
> +	switch (evtid) {
> +	case QOS_L3_MBM_TOTAL_EVENT_ID:
> +		return 0;
> +	case QOS_L3_MBM_LOCAL_EVENT_ID:
> +		return 1;
> +	default:
> +		/* WARN */
> +		return INVALID_CONFIG_INDEX;
> +	}
> +}

I see that you copied my sample code here. My intention was that the
/* WARN */ comment be replaced with an actual warning. As a comment
it does not add value. Since the caller now prints a subtler warning it
could just be:

	/* Should never reach here */
	return INVALID_CONFIG_INDEX;

> +
> +static void mon_event_config_read(void *info)
> +{
> +	struct mon_config_info *mon_info = info;
> +	u32 h, index;

index can be "unsigned int" as returned by mon_event_config_index_get()

> +
> +	index = mon_event_config_index_get(mon_info->evtid);
> +	if (index == INVALID_CONFIG_INDEX) {
> +		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
> +		return;
> +	}
> +	rdmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, h);
> +
> +	/* Report only the valid event configuration bits */
> +	mon_info->mon_config &= MAX_EVT_CONFIG_BITS;
> +}
> +
> +static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info)
> +{
> +	smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1);
> +}
> +
> +static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
> +{
> +	struct mon_config_info mon_info = {0};
> +	struct rdt_domain *dom;
> +	bool sep = false;
> +
> +	mutex_lock(&rdtgroup_mutex);
> +
> +	list_for_each_entry(dom, &r->domains, list) {
> +		if (sep)
> +			seq_puts(s, ";");
> +
> +		mon_info.evtid = evtid;
> +		mondata_config_read(dom, &mon_info);
> +

For robustness, please reset mon_config before calling mondata_config_read(). Since
mon_event_config_read() may (yes this is very unlikely) exit early then mon_config
will contain the data from the previous domain.

> +		seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config);
> +		sep = true;
> +	}
> +	seq_puts(s, "\n");
> +
> +	mutex_unlock(&rdtgroup_mutex);
> +
> +	return 0;
> +}
> +

...

Reinette
  
Moger, Babu Dec. 19, 2022, 6:21 p.m. UTC | #2
[AMD Official Use Only - General]

Hi Reinette,

> -----Original Message-----
> From: Reinette Chatre <reinette.chatre@intel.com>
> Sent: Thursday, December 15, 2022 11:41 AM
> To: Moger, Babu <Babu.Moger@amd.com>; corbet@lwn.net;
> tglx@linutronix.de; mingo@redhat.com; bp@alien8.de
> Cc: fenghua.yu@intel.com; dave.hansen@linux.intel.com; x86@kernel.org;
> hpa@zytor.com; paulmck@kernel.org; akpm@linux-foundation.org;
> quic_neeraju@quicinc.com; rdunlap@infradead.org;
> damien.lemoal@opensource.wdc.com; songmuchun@bytedance.com;
> peterz@infradead.org; jpoimboe@kernel.org; pbonzini@redhat.com;
> chang.seok.bae@intel.com; pawan.kumar.gupta@linux.intel.com;
> jmattson@google.com; daniel.sneddon@linux.intel.com; Das1, Sandipan
> <Sandipan.Das@amd.com>; tony.luck@intel.com; james.morse@arm.com;
> linux-doc@vger.kernel.org; linux-kernel@vger.kernel.org;
> bagasdotme@gmail.com; eranian@google.com; christophe.leroy@csgroup.eu;
> jarkko@kernel.org; adrian.hunter@intel.com; quic_jiles@quicinc.com;
> peternewman@google.com
> Subject: Re: [PATCH v9 08/13] x86/resctrl: Add sysfs interface to read
> mbm_total_bytes_config
> 
> Hi Babu,
> 
> I would like to second James's suggestion to replace sysfs with resctrl or just
> remove it. I am concerned that you mentioned in recent message that you only
> plan changes to patch 10 while James highlighted that this needs to be
> addressed in entire series. Could you please ensure that you check all the
> patches?

Ok. Sure. Will remove it.
x86/resctrl: Add interface to write mbm_total_bytes_config

Will check other patches for similar changes.
 
> 
> On 12/1/2022 7:36 AM, Babu Moger wrote:
> > The current event configuration can be viewed by the user by reading
> 
> What "current" means is not clear and the term could just be removed.

Will remove it.
> 
> > the configuration file /sys/fs/resctrl/info/L3_MON/mbm_total_bytes_config.
> > The event configuration settings are domain specific and will affect
> > all the CPUs in the domain.
> >
> > Following are the types of events supported:
> > ====
> ===========================================================
> > Bits   Description
> > ====
> ===========================================================
> > 6      Dirty Victims from the QOS domain to all types of memory
> > 5      Reads to slow memory in the non-local NUMA domain
> > 4      Reads to slow memory in the local NUMA domain
> > 3      Non-temporal writes to non-local NUMA domain
> > 2      Non-temporal writes to local NUMA domain
> > 1      Reads to memory in the non-local NUMA domain
> > 0      Reads to memory in the local NUMA domain
> > ====
> ===========================================================
> >
> > By default, the mbm_total_bytes_config is set to 0x7f to count all the
> > event types.
> >
> > For example:
> >     $cat /sys/fs/resctrl/info/L3_MON/mbm_total_bytes_config
> >     0=0x7f;1=0x7f;2=0x7f;3=0x7f
> >
> >     In this case, the event mbm_total_bytes is currently configured
> >     with 0x7f on domains 0 to 3.
> 
> "currently" can be removed since it already starts with "In this case".

Sure.
> 
> 
> > Signed-off-by: Babu Moger <babu.moger@amd.com>
> > ---
> >  arch/x86/include/asm/msr-index.h       |    1
> >  arch/x86/kernel/cpu/resctrl/internal.h |   24 ++++++++
> >  arch/x86/kernel/cpu/resctrl/monitor.c  |    4 +
> >  arch/x86/kernel/cpu/resctrl/rdtgroup.c |   99
> ++++++++++++++++++++++++++++++++
> >  4 files changed, 127 insertions(+), 1 deletion(-)
> >
> 
> ...
> 
> > diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> > b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> > index 8342feb54a7f..e93b1c206116 100644
> > --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> > +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> > @@ -1423,6 +1423,90 @@ static int rdtgroup_size_show(struct
> kernfs_open_file *of,
> >  	return ret;
> >  }
> >
> > +struct mon_config_info {
> > +	u32 evtid;
> > +	u32 mon_config;
> > +};
> > +
> > +#define INVALID_CONFIG_INDEX   UINT_MAX
> > +
> > +/**
> > + * mon_event_config_index_get - get the index for the configurable
> > +event
> 
> Could you say "get the hardware index" to help clarify what the index is for?

Sure.

> 
> > + * @evtid: event id.
> > + *
> > + * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID
> > + *         1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID
> > + *         INVALID_CONFIG_INDEX for invalid evtid
> > + */
> > +static inline unsigned int mon_event_config_index_get(u32 evtid) {
> > +	switch (evtid) {
> > +	case QOS_L3_MBM_TOTAL_EVENT_ID:
> > +		return 0;
> > +	case QOS_L3_MBM_LOCAL_EVENT_ID:
> > +		return 1;
> > +	default:
> > +		/* WARN */
> > +		return INVALID_CONFIG_INDEX;
> > +	}
> > +}
> 
> I see that you copied my sample code here. My intention was that the
> /* WARN */ comment be replaced with an actual warning. As a comment it
> does not add value. Since the caller now prints a subtler warning it could just
> be:
> 
> 	/* Should never reach here */
> 	return INVALID_CONFIG_INDEX;

Sure.
> 
> > +
> > +static void mon_event_config_read(void *info) {
> > +	struct mon_config_info *mon_info = info;
> > +	u32 h, index;
> 
> index can be "unsigned int" as returned by mon_event_config_index_get()

Ok. Sure

> 
> > +
> > +	index = mon_event_config_index_get(mon_info->evtid);
> > +	if (index == INVALID_CONFIG_INDEX) {
> > +		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
> > +		return;
> > +	}
> > +	rdmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, h);
> > +
> > +	/* Report only the valid event configuration bits */
> > +	mon_info->mon_config &= MAX_EVT_CONFIG_BITS; }
> > +
> > +static void mondata_config_read(struct rdt_domain *d, struct
> > +mon_config_info *mon_info) {
> > +	smp_call_function_any(&d->cpu_mask, mon_event_config_read,
> mon_info,
> > +1); }
> > +
> > +static int mbm_config_show(struct seq_file *s, struct rdt_resource
> > +*r, u32 evtid) {
> > +	struct mon_config_info mon_info = {0};
> > +	struct rdt_domain *dom;
> > +	bool sep = false;
> > +
> > +	mutex_lock(&rdtgroup_mutex);
> > +
> > +	list_for_each_entry(dom, &r->domains, list) {
> > +		if (sep)
> > +			seq_puts(s, ";");
> > +
> > +		mon_info.evtid = evtid;
> > +		mondata_config_read(dom, &mon_info);
> > +
> 
> For robustness, please reset mon_config before calling mondata_config_read().
> Since
> mon_event_config_read() may (yes this is very unlikely) exit early then
> mon_config will contain the data from the previous domain.

Sure. I can call memset to reset.
Thanks
Babu

> 
> > +		seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config);
> > +		sep = true;
> > +	}
> > +	seq_puts(s, "\n");
> > +
> > +	mutex_unlock(&rdtgroup_mutex);
> > +
> > +	return 0;
> > +}
> > +
> 
> ...
> 
> Reinette
  

Patch

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e0a40027aa62..2e5f57c93605 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -1062,6 +1062,7 @@ 
 /* - AMD: */
 #define MSR_IA32_MBA_BW_BASE		0xc0000200
 #define MSR_IA32_SMBA_BW_BASE		0xc0000280
+#define MSR_IA32_EVT_CFG_BASE		0xc0000400
 
 /* MSR_IA32_VMX_MISC bits */
 #define MSR_IA32_VMX_MISC_INTEL_PT                 (1ULL << 14)
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index b36750334deb..f7fc69e82e8b 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -30,6 +30,29 @@ 
  */
 #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
 
+/* Reads to Local DRAM Memory */
+#define READS_TO_LOCAL_MEM		BIT(0)
+
+/* Reads to Remote DRAM Memory */
+#define READS_TO_REMOTE_MEM		BIT(1)
+
+/* Non-Temporal Writes to Local Memory */
+#define NON_TEMP_WRITE_TO_LOCAL_MEM	BIT(2)
+
+/* Non-Temporal Writes to Remote Memory */
+#define NON_TEMP_WRITE_TO_REMOTE_MEM	BIT(3)
+
+/* Reads to Local Memory the system identifies as "Slow Memory" */
+#define READS_TO_LOCAL_S_MEM		BIT(4)
+
+/* Reads to Remote Memory the system identifies as "Slow Memory" */
+#define READS_TO_REMOTE_S_MEM		BIT(5)
+
+/* Dirty Victims to All Types of Memory */
+#define  DIRTY_VICTIMS_TO_ALL_MEM	BIT(6)
+
+/* Max event bits supported */
+#define MAX_EVT_CONFIG_BITS		GENMASK(6, 0)
 
 struct rdt_fs_context {
 	struct kernfs_fs_context	kfc;
@@ -531,5 +554,6 @@  bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
 void __check_limbo(struct rdt_domain *d, bool force_free);
 void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
 void __init thread_throttle_mode_init(void);
+void __init mbm_config_rftype_init(const char *config);
 
 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index b39e0eca1879..2afddebc8636 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -784,8 +784,10 @@  int __init rdt_get_mon_l3_config(struct rdt_resource *r)
 		return ret;
 
 	if (rdt_cpu_has(X86_FEATURE_BMEC)) {
-		if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL))
+		if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
 			mbm_total_event.configurable = true;
+			mbm_config_rftype_init("mbm_total_bytes_config");
+		}
 		if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))
 			mbm_local_event.configurable = true;
 	}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 8342feb54a7f..e93b1c206116 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1423,6 +1423,90 @@  static int rdtgroup_size_show(struct kernfs_open_file *of,
 	return ret;
 }
 
+struct mon_config_info {
+	u32 evtid;
+	u32 mon_config;
+};
+
+#define INVALID_CONFIG_INDEX   UINT_MAX
+
+/**
+ * mon_event_config_index_get - get the index for the configurable event
+ * @evtid: event id.
+ *
+ * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID
+ *         1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID
+ *         INVALID_CONFIG_INDEX for invalid evtid
+ */
+static inline unsigned int mon_event_config_index_get(u32 evtid)
+{
+	switch (evtid) {
+	case QOS_L3_MBM_TOTAL_EVENT_ID:
+		return 0;
+	case QOS_L3_MBM_LOCAL_EVENT_ID:
+		return 1;
+	default:
+		/* WARN */
+		return INVALID_CONFIG_INDEX;
+	}
+}
+
+static void mon_event_config_read(void *info)
+{
+	struct mon_config_info *mon_info = info;
+	u32 h, index;
+
+	index = mon_event_config_index_get(mon_info->evtid);
+	if (index == INVALID_CONFIG_INDEX) {
+		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
+		return;
+	}
+	rdmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, h);
+
+	/* Report only the valid event configuration bits */
+	mon_info->mon_config &= MAX_EVT_CONFIG_BITS;
+}
+
+static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info)
+{
+	smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1);
+}
+
+static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
+{
+	struct mon_config_info mon_info = {0};
+	struct rdt_domain *dom;
+	bool sep = false;
+
+	mutex_lock(&rdtgroup_mutex);
+
+	list_for_each_entry(dom, &r->domains, list) {
+		if (sep)
+			seq_puts(s, ";");
+
+		mon_info.evtid = evtid;
+		mondata_config_read(dom, &mon_info);
+
+		seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config);
+		sep = true;
+	}
+	seq_puts(s, "\n");
+
+	mutex_unlock(&rdtgroup_mutex);
+
+	return 0;
+}
+
+static int mbm_total_bytes_config_show(struct kernfs_open_file *of,
+				       struct seq_file *seq, void *v)
+{
+	struct rdt_resource *r = of->kn->parent->priv;
+
+	mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID);
+
+	return 0;
+}
+
 /* rdtgroup information files for one cache resource. */
 static struct rftype res_common_files[] = {
 	{
@@ -1521,6 +1605,12 @@  static struct rftype res_common_files[] = {
 		.seq_show	= max_threshold_occ_show,
 		.fflags		= RF_MON_INFO | RFTYPE_RES_CACHE,
 	},
+	{
+		.name		= "mbm_total_bytes_config",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= mbm_total_bytes_config_show,
+	},
 	{
 		.name		= "cpus",
 		.mode		= 0644,
@@ -1627,6 +1717,15 @@  void __init thread_throttle_mode_init(void)
 	rft->fflags = RF_CTRL_INFO | RFTYPE_RES_MB;
 }
 
+void __init mbm_config_rftype_init(const char *config)
+{
+	struct rftype *rft;
+
+	rft = rdtgroup_get_rftype_by_name(config);
+	if (rft)
+		rft->fflags = RF_MON_INFO | RFTYPE_RES_CACHE;
+}
+
 /**
  * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
  * @r: The resource group with which the file is associated.