[v2,2/2] x86/resctrl: Remove hard-coded memory bandwidth event configuration

Message ID 170240414535.760665.1609957728181418569.stgit@bmoger-ubuntu
State New
Headers
Series None |

Commit Message

Moger, Babu Dec. 12, 2023, 6:02 p.m. UTC
  If the BMEC (Bandwidth Monitoring Event Configuration) feature is
supported, the bandwidth events can be configured. The maximum supported
bandwidth bitmask can be determined by following CPUID command.

CPUID_Fn80000020_ECX_x03 [Platform QoS Monitoring Bandwidth Event
Configuration] Read-only. Reset: 0000_007Fh.
Bits	Description
31:7	Reserved
 6:0	Identifies the bandwidth sources that can be tracked.

The bandwidth sources can change with the processor generations.
Currently, this information is hard-coded. Remove the hard-coded value
and detect using CPUID command. Also print the valid bitmask when the
user tries to configure invalid value.

The CPUID details are documentation in the PPR listed below [1].
[1] Processor Programming Reference (PPR) Vol 1.1 for AMD Family 19h Model
11h B1 - 55901 Rev 0.25.

Fixes: dc2a3e857981 ("x86/resctrl: Add interface to read mbm_total_bytes_config")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537
Signed-off-by: Babu Moger <babu.moger@amd.com>

---
v2: Earlier Sent as a part of ABMC feature.
    https://lore.kernel.org/lkml/20231201005720.235639-1-babu.moger@amd.com/
    But this is not related to ABMC. Sending it separate now.
    Removed the global resctrl_max_evt_bitmask. Added event_mask as part of
    the resource.
---
 arch/x86/kernel/cpu/resctrl/internal.h |    5 ++---
 arch/x86/kernel/cpu/resctrl/monitor.c  |    6 ++++++
 arch/x86/kernel/cpu/resctrl/rdtgroup.c |   18 ++++++++++--------
 3 files changed, 18 insertions(+), 11 deletions(-)
  

Comments

Reinette Chatre Dec. 15, 2023, 1:24 a.m. UTC | #1
Hi Babu,

On 12/12/2023 10:02 AM, Babu Moger wrote:
> If the BMEC (Bandwidth Monitoring Event Configuration) feature is
> supported, the bandwidth events can be configured. The maximum supported
> bandwidth bitmask can be determined by following CPUID command.
> 
> CPUID_Fn80000020_ECX_x03 [Platform QoS Monitoring Bandwidth Event
> Configuration] Read-only. Reset: 0000_007Fh.
> Bits	Description
> 31:7	Reserved
>  6:0	Identifies the bandwidth sources that can be tracked.
> 
> The bandwidth sources can change with the processor generations.
> Currently, this information is hard-coded. Remove the hard-coded value
> and detect using CPUID command. Also print the valid bitmask when the
> user tries to configure invalid value.
> 
> The CPUID details are documentation in the PPR listed below [1].
> [1] Processor Programming Reference (PPR) Vol 1.1 for AMD Family 19h Model
> 11h B1 - 55901 Rev 0.25.
> 
> Fixes: dc2a3e857981 ("x86/resctrl: Add interface to read mbm_total_bytes_config")
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537
> Signed-off-by: Babu Moger <babu.moger@amd.com>
> 
> ---
> v2: Earlier Sent as a part of ABMC feature.
>     https://lore.kernel.org/lkml/20231201005720.235639-1-babu.moger@amd.com/
>     But this is not related to ABMC. Sending it separate now.
>     Removed the global resctrl_max_evt_bitmask. Added event_mask as part of
>     the resource.
> ---
>  arch/x86/kernel/cpu/resctrl/internal.h |    5 ++---
>  arch/x86/kernel/cpu/resctrl/monitor.c  |    6 ++++++
>  arch/x86/kernel/cpu/resctrl/rdtgroup.c |   18 ++++++++++--------
>  3 files changed, 18 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index d2979748fae4..3e2f505614d8 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -50,9 +50,6 @@
>  /* Dirty Victims to All Types of Memory */
>  #define DIRTY_VICTIMS_TO_ALL_MEM	BIT(6)
>  
> -/* Max event bits supported */
> -#define MAX_EVT_CONFIG_BITS		GENMASK(6, 0)
> -
>  struct rdt_fs_context {
>  	struct kernfs_fs_context	kfc;
>  	bool				enable_cdpl2;
> @@ -394,6 +391,7 @@ struct rdt_parse_data {
>   * @msr_update:		Function pointer to update QOS MSRs
>   * @mon_scale:		cqm counter * mon_scale = occupancy in bytes
>   * @mbm_width:		Monitor width, to detect and correct for overflow.
> + * @event_mask:		Max supported event bitmask.

This is a very generic name and description for this feature. Note that in
resctrl monitoring an "event" is already clear (see members of enum resctrl_event_id)
so a generic type of "event_mask" can easily cause confusion with existing
concept of events. How about "mbm_cfg_mask"? Please also make the description
more detailed - it could include that this is unique to BMEC. 

>   * @cdp_enabled:	CDP state of this resource
>   *
>   * Members of this structure are either private to the architecture
> @@ -408,6 +406,7 @@ struct rdt_hw_resource {
>  				 struct rdt_resource *r);
>  	unsigned int		mon_scale;
>  	unsigned int		mbm_width;
> +	unsigned int		event_mask;
>  	bool			cdp_enabled;
>  };
>  
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index f136ac046851..30bf919edfda 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -813,6 +813,12 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
>  		return ret;
>  
>  	if (rdt_cpu_has(X86_FEATURE_BMEC)) {
> +		u32 eax, ebx, ecx, edx;
> +
> +		/* Detect list of bandwidth sources that can be tracked */
> +		cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx);
> +		hw_res->event_mask = ecx;
> +

This has the same issue as I mentioned in V1. Note that this treats
reserved bits as valid values. I think this is a risky thing to do. For example
when this code is run on future hardware the currently reserved bits may have
values with different meaning than what this code uses it for.

>  		if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
>  			mbm_total_event.configurable = true;
>  			mbm_config_rftype_init("mbm_total_bytes_config");
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 69a1de92384a..8a1e9fdab974 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -1537,17 +1537,14 @@ static void mon_event_config_read(void *info)
>  {
>  	struct mon_config_info *mon_info = info;
>  	unsigned int index;
> -	u64 msrval;
> +	u32 h;
>  
>  	index = mon_event_config_index_get(mon_info->evtid);
>  	if (index == INVALID_CONFIG_INDEX) {
>  		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
>  		return;
>  	}
> -	rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
> -
> -	/* Report only the valid event configuration bits */
> -	mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
> +	rdmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, h);

I do not think this code needed to be changed. We do not want to treat
reserved bits as valid values. 

>  }
>  
>  static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info)
> @@ -1557,6 +1554,7 @@ static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mo
>  
>  static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
>  {
> +	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
>  	struct mon_config_info mon_info = {0};
>  	struct rdt_domain *dom;
>  	bool sep = false;
> @@ -1571,7 +1569,9 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid
>  		mon_info.evtid = evtid;
>  		mondata_config_read(dom, &mon_info);
>  
> -		seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config);
> +		/* Report only the valid event configuration bits */
> +		seq_printf(s, "%d=0x%02x", dom->id,
> +			   mon_info.mon_config & hw_res->event_mask);
>  		sep = true;
>  	}
>  	seq_puts(s, "\n");
> @@ -1617,12 +1617,14 @@ static void mon_event_config_write(void *info)
>  static int mbm_config_write_domain(struct rdt_resource *r,
>  				   struct rdt_domain *d, u32 evtid, u32 val)
>  {
> +	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
>  	struct mon_config_info mon_info = {0};
>  	int ret = 0;
>  
>  	/* mon_config cannot be more than the supported set of events */
> -	if (val > MAX_EVT_CONFIG_BITS) {
> -		rdt_last_cmd_puts("Invalid event configuration\n");
> +	if ((val & hw_res->event_mask) != val) {
> +		rdt_last_cmd_printf("Invalid input: The maximum valid bitmask is 0x%02x\n",
> +				    hw_res->event_mask);
>  		return -EINVAL;
>  	}
>  
> 
> 

Reinette
  
Moger, Babu Jan. 2, 2024, 8 p.m. UTC | #2
Hi Reinette,

Sorry for late response. I was out of office for couple of weeks.

On 12/14/23 19:24, Reinette Chatre wrote:
> Hi Babu,
> 
> On 12/12/2023 10:02 AM, Babu Moger wrote:
>> If the BMEC (Bandwidth Monitoring Event Configuration) feature is
>> supported, the bandwidth events can be configured. The maximum supported
>> bandwidth bitmask can be determined by following CPUID command.
>>
>> CPUID_Fn80000020_ECX_x03 [Platform QoS Monitoring Bandwidth Event
>> Configuration] Read-only. Reset: 0000_007Fh.
>> Bits	Description
>> 31:7	Reserved
>>  6:0	Identifies the bandwidth sources that can be tracked.
>>
>> The bandwidth sources can change with the processor generations.
>> Currently, this information is hard-coded. Remove the hard-coded value
>> and detect using CPUID command. Also print the valid bitmask when the
>> user tries to configure invalid value.
>>
>> The CPUID details are documentation in the PPR listed below [1].
>> [1] Processor Programming Reference (PPR) Vol 1.1 for AMD Family 19h Model
>> 11h B1 - 55901 Rev 0.25.
>>
>> Fixes: dc2a3e857981 ("x86/resctrl: Add interface to read mbm_total_bytes_config")
>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537
>> Signed-off-by: Babu Moger <babu.moger@amd.com>
>>
>> ---
>> v2: Earlier Sent as a part of ABMC feature.
>>     https://lore.kernel.org/lkml/20231201005720.235639-1-babu.moger@amd.com/
>>     But this is not related to ABMC. Sending it separate now.
>>     Removed the global resctrl_max_evt_bitmask. Added event_mask as part of
>>     the resource.
>> ---
>>  arch/x86/kernel/cpu/resctrl/internal.h |    5 ++---
>>  arch/x86/kernel/cpu/resctrl/monitor.c  |    6 ++++++
>>  arch/x86/kernel/cpu/resctrl/rdtgroup.c |   18 ++++++++++--------
>>  3 files changed, 18 insertions(+), 11 deletions(-)
>>
>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
>> index d2979748fae4..3e2f505614d8 100644
>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>> @@ -50,9 +50,6 @@
>>  /* Dirty Victims to All Types of Memory */
>>  #define DIRTY_VICTIMS_TO_ALL_MEM	BIT(6)
>>  
>> -/* Max event bits supported */
>> -#define MAX_EVT_CONFIG_BITS		GENMASK(6, 0)
>> -
>>  struct rdt_fs_context {
>>  	struct kernfs_fs_context	kfc;
>>  	bool				enable_cdpl2;
>> @@ -394,6 +391,7 @@ struct rdt_parse_data {
>>   * @msr_update:		Function pointer to update QOS MSRs
>>   * @mon_scale:		cqm counter * mon_scale = occupancy in bytes
>>   * @mbm_width:		Monitor width, to detect and correct for overflow.
>> + * @event_mask:		Max supported event bitmask.
> 
> This is a very generic name and description for this feature. Note that in
> resctrl monitoring an "event" is already clear (see members of enum resctrl_event_id)
> so a generic type of "event_mask" can easily cause confusion with existing
> concept of events. How about "mbm_cfg_mask"? Please also make the description

That should be fine.

> more detailed - it could include that this is unique to BMEC. 

Sure.

> 
>>   * @cdp_enabled:	CDP state of this resource
>>   *
>>   * Members of this structure are either private to the architecture
>> @@ -408,6 +406,7 @@ struct rdt_hw_resource {
>>  				 struct rdt_resource *r);
>>  	unsigned int		mon_scale;
>>  	unsigned int		mbm_width;
>> +	unsigned int		event_mask;
>>  	bool			cdp_enabled;
>>  };
>>  
>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
>> index f136ac046851..30bf919edfda 100644
>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>> @@ -813,6 +813,12 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
>>  		return ret;
>>  
>>  	if (rdt_cpu_has(X86_FEATURE_BMEC)) {
>> +		u32 eax, ebx, ecx, edx;
>> +
>> +		/* Detect list of bandwidth sources that can be tracked */
>> +		cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx);
>> +		hw_res->event_mask = ecx;
>> +
> 
> This has the same issue as I mentioned in V1. Note that this treats
> reserved bits as valid values. I think this is a risky thing to do. For example
> when this code is run on future hardware the currently reserved bits may have
> values with different meaning than what this code uses it for.

Sure. Will use the mask MAX_EVT_CONFIG_BITS.
              hw_res->mbm_cfg_mask = ecx &  MAX_EVT_CONFIG_BITS;

> 
>>  		if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
>>  			mbm_total_event.configurable = true;
>>  			mbm_config_rftype_init("mbm_total_bytes_config");
>> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> index 69a1de92384a..8a1e9fdab974 100644
>> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> @@ -1537,17 +1537,14 @@ static void mon_event_config_read(void *info)
>>  {
>>  	struct mon_config_info *mon_info = info;
>>  	unsigned int index;
>> -	u64 msrval;
>> +	u32 h;
>>  
>>  	index = mon_event_config_index_get(mon_info->evtid);
>>  	if (index == INVALID_CONFIG_INDEX) {
>>  		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
>>  		return;
>>  	}
>> -	rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
>> -
>> -	/* Report only the valid event configuration bits */
>> -	mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
>> +	rdmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, h);
> 
> I do not think this code needed to be changed. We do not want to treat
> reserved bits as valid values. 

The logic is still the same. We don't have access to rdt_hw_resource in
this function. So, I just moved the masking to mbm_config_show while printing.

Thanks
Babu

> 
>>  }
>>  
>>  static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info)
>> @@ -1557,6 +1554,7 @@ static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mo
>>  
>>  static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
>>  {
>> +	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
>>  	struct mon_config_info mon_info = {0};
>>  	struct rdt_domain *dom;
>>  	bool sep = false;
>> @@ -1571,7 +1569,9 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid
>>  		mon_info.evtid = evtid;
>>  		mondata_config_read(dom, &mon_info);
>>  
>> -		seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config);
>> +		/* Report only the valid event configuration bits */
>> +		seq_printf(s, "%d=0x%02x", dom->id,
>> +			   mon_info.mon_config & hw_res->event_mask);
>>  		sep = true;
>>  	}
>>  	seq_puts(s, "\n");
>> @@ -1617,12 +1617,14 @@ static void mon_event_config_write(void *info)
>>  static int mbm_config_write_domain(struct rdt_resource *r,
>>  				   struct rdt_domain *d, u32 evtid, u32 val)
>>  {
>> +	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
>>  	struct mon_config_info mon_info = {0};
>>  	int ret = 0;
>>  
>>  	/* mon_config cannot be more than the supported set of events */
>> -	if (val > MAX_EVT_CONFIG_BITS) {
>> -		rdt_last_cmd_puts("Invalid event configuration\n");
>> +	if ((val & hw_res->event_mask) != val) {
>> +		rdt_last_cmd_printf("Invalid input: The maximum valid bitmask is 0x%02x\n",
>> +				    hw_res->event_mask);
>>  		return -EINVAL;
>>  	}
>>  
>>
>>
> 
> Reinette
  
Reinette Chatre Jan. 3, 2024, 6:38 p.m. UTC | #3
Hi Babu,

On 1/2/2024 12:00 PM, Moger, Babu wrote:
> On 12/14/23 19:24, Reinette Chatre wrote:
>> On 12/12/2023 10:02 AM, Babu Moger wrote:

>>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
>>> index f136ac046851..30bf919edfda 100644
>>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>>> @@ -813,6 +813,12 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
>>>  		return ret;
>>>  
>>>  	if (rdt_cpu_has(X86_FEATURE_BMEC)) {
>>> +		u32 eax, ebx, ecx, edx;
>>> +
>>> +		/* Detect list of bandwidth sources that can be tracked */
>>> +		cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx);
>>> +		hw_res->event_mask = ecx;
>>> +
>>
>> This has the same issue as I mentioned in V1. Note that this treats
>> reserved bits as valid values. I think this is a risky thing to do. For example
>> when this code is run on future hardware the currently reserved bits may have
>> values with different meaning than what this code uses it for.
> 
> Sure. Will use the mask MAX_EVT_CONFIG_BITS.
>               hw_res->mbm_cfg_mask = ecx &  MAX_EVT_CONFIG_BITS;
> 
>>
>>>  		if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
>>>  			mbm_total_event.configurable = true;
>>>  			mbm_config_rftype_init("mbm_total_bytes_config");
>>> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>>> index 69a1de92384a..8a1e9fdab974 100644
>>> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>>> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>>> @@ -1537,17 +1537,14 @@ static void mon_event_config_read(void *info)
>>>  {
>>>  	struct mon_config_info *mon_info = info;
>>>  	unsigned int index;
>>> -	u64 msrval;
>>> +	u32 h;
>>>  
>>>  	index = mon_event_config_index_get(mon_info->evtid);
>>>  	if (index == INVALID_CONFIG_INDEX) {
>>>  		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
>>>  		return;
>>>  	}
>>> -	rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
>>> -
>>> -	/* Report only the valid event configuration bits */
>>> -	mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
>>> +	rdmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, h);
>>
>> I do not think this code needed to be changed. We do not want to treat
>> reserved bits as valid values. 
> 
> The logic is still the same. We don't have access to rdt_hw_resource in
> this function. So, I just moved the masking to mbm_config_show while printing.

Why do you need access to rdt_hw_resource? This comment is not about the bandwidth
events supported by the device but instead the bits used to represent these events.
This is the same issue as in rdt_get_mon_l3_config. The above change returns
reserved bits as valid while the original code ensured that only bits used for
field are returned (through the usage of MAX_EVT_CONFIG_BITS).

Reinette
  
Moger, Babu Jan. 3, 2024, 9:03 p.m. UTC | #4
Hi Reinette,

On 1/3/24 12:38, Reinette Chatre wrote:
> Hi Babu,
> 
> On 1/2/2024 12:00 PM, Moger, Babu wrote:
>> On 12/14/23 19:24, Reinette Chatre wrote:
>>> On 12/12/2023 10:02 AM, Babu Moger wrote:
> 
>>>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
>>>> index f136ac046851..30bf919edfda 100644
>>>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>>>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>>>> @@ -813,6 +813,12 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
>>>>  		return ret;
>>>>  
>>>>  	if (rdt_cpu_has(X86_FEATURE_BMEC)) {
>>>> +		u32 eax, ebx, ecx, edx;
>>>> +
>>>> +		/* Detect list of bandwidth sources that can be tracked */
>>>> +		cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx);
>>>> +		hw_res->event_mask = ecx;
>>>> +
>>>
>>> This has the same issue as I mentioned in V1. Note that this treats
>>> reserved bits as valid values. I think this is a risky thing to do. For example
>>> when this code is run on future hardware the currently reserved bits may have
>>> values with different meaning than what this code uses it for.
>>
>> Sure. Will use the mask MAX_EVT_CONFIG_BITS.
>>               hw_res->mbm_cfg_mask = ecx &  MAX_EVT_CONFIG_BITS;
>>
>>>
>>>>  		if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
>>>>  			mbm_total_event.configurable = true;
>>>>  			mbm_config_rftype_init("mbm_total_bytes_config");
>>>> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>>>> index 69a1de92384a..8a1e9fdab974 100644
>>>> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>>>> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>>>> @@ -1537,17 +1537,14 @@ static void mon_event_config_read(void *info)
>>>>  {
>>>>  	struct mon_config_info *mon_info = info;
>>>>  	unsigned int index;
>>>> -	u64 msrval;
>>>> +	u32 h;
>>>>  
>>>>  	index = mon_event_config_index_get(mon_info->evtid);
>>>>  	if (index == INVALID_CONFIG_INDEX) {
>>>>  		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
>>>>  		return;
>>>>  	}
>>>> -	rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
>>>> -
>>>> -	/* Report only the valid event configuration bits */
>>>> -	mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
>>>> +	rdmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, h);
>>>
>>> I do not think this code needed to be changed. We do not want to treat
>>> reserved bits as valid values. 
>>
>> The logic is still the same. We don't have access to rdt_hw_resource in
>> this function. So, I just moved the masking to mbm_config_show while printing.
> 
> Why do you need access to rdt_hw_resource? This comment is not about the bandwidth
> events supported by the device but instead the bits used to represent these events.
> This is the same issue as in rdt_get_mon_l3_config. The above change returns
> reserved bits as valid while the original code ensured that only bits used for
> field are returned (through the usage of MAX_EVT_CONFIG_BITS).

We are already saving the valid bits in hw_res->mbm_cfg_mask during the init.

hw_res->mbm_cfg_mask = ecx &  MAX_EVT_CONFIG_BITS;

I thought we can use it here directly to mask any unsupported bits. So, I
re-arranged the code here.
  
Reinette Chatre Jan. 3, 2024, 9:40 p.m. UTC | #5
Hi Babu,

On 1/3/2024 1:03 PM, Moger, Babu wrote:
> On 1/3/24 12:38, Reinette Chatre wrote:
>> On 1/2/2024 12:00 PM, Moger, Babu wrote:
>>> On 12/14/23 19:24, Reinette Chatre wrote:
>>>> On 12/12/2023 10:02 AM, Babu Moger wrote:
>>
>>>>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
>>>>> index f136ac046851..30bf919edfda 100644
>>>>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>>>>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>>>>> @@ -813,6 +813,12 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
>>>>>  		return ret;
>>>>>  
>>>>>  	if (rdt_cpu_has(X86_FEATURE_BMEC)) {
>>>>> +		u32 eax, ebx, ecx, edx;
>>>>> +
>>>>> +		/* Detect list of bandwidth sources that can be tracked */
>>>>> +		cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx);
>>>>> +		hw_res->event_mask = ecx;
>>>>> +
>>>>
>>>> This has the same issue as I mentioned in V1. Note that this treats
>>>> reserved bits as valid values. I think this is a risky thing to do. For example
>>>> when this code is run on future hardware the currently reserved bits may have
>>>> values with different meaning than what this code uses it for.
>>>
>>> Sure. Will use the mask MAX_EVT_CONFIG_BITS.
>>>               hw_res->mbm_cfg_mask = ecx &  MAX_EVT_CONFIG_BITS;
>>>
>>>>
>>>>>  		if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
>>>>>  			mbm_total_event.configurable = true;
>>>>>  			mbm_config_rftype_init("mbm_total_bytes_config");
>>>>> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>>>>> index 69a1de92384a..8a1e9fdab974 100644
>>>>> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>>>>> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>>>>> @@ -1537,17 +1537,14 @@ static void mon_event_config_read(void *info)
>>>>>  {
>>>>>  	struct mon_config_info *mon_info = info;
>>>>>  	unsigned int index;
>>>>> -	u64 msrval;
>>>>> +	u32 h;
>>>>>  
>>>>>  	index = mon_event_config_index_get(mon_info->evtid);
>>>>>  	if (index == INVALID_CONFIG_INDEX) {
>>>>>  		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
>>>>>  		return;
>>>>>  	}
>>>>> -	rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
>>>>> -
>>>>> -	/* Report only the valid event configuration bits */
>>>>> -	mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
>>>>> +	rdmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, h);
>>>>
>>>> I do not think this code needed to be changed. We do not want to treat
>>>> reserved bits as valid values. 
>>>
>>> The logic is still the same. We don't have access to rdt_hw_resource in
>>> this function. So, I just moved the masking to mbm_config_show while printing.
>>
>> Why do you need access to rdt_hw_resource? This comment is not about the bandwidth
>> events supported by the device but instead the bits used to represent these events.
>> This is the same issue as in rdt_get_mon_l3_config. The above change returns
>> reserved bits as valid while the original code ensured that only bits used for
>> field are returned (through the usage of MAX_EVT_CONFIG_BITS).
> 
> We are already saving the valid bits in hw_res->mbm_cfg_mask during the init.
> 
> hw_res->mbm_cfg_mask = ecx &  MAX_EVT_CONFIG_BITS;
> 
> I thought we can use it here directly to mask any unsupported bits. So, I
> re-arranged the code here.

I am not sure where you mean when you say "use it here" since mbm_cfg_mask is not
used in mon_event_config_read(). My comment is related to mon_event_config_read()
that can reasonably be expected to, and thus should, return the current "mon event
config" value and nothing more. 

Reinette
  
Moger, Babu Jan. 4, 2024, 1:48 p.m. UTC | #6
Hi Reinette,

On 1/3/24 15:40, Reinette Chatre wrote:
> Hi Babu,
> 
> On 1/3/2024 1:03 PM, Moger, Babu wrote:
>> On 1/3/24 12:38, Reinette Chatre wrote:
>>> On 1/2/2024 12:00 PM, Moger, Babu wrote:
>>>> On 12/14/23 19:24, Reinette Chatre wrote:
>>>>> On 12/12/2023 10:02 AM, Babu Moger wrote:
>>>
>>>>>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
>>>>>> index f136ac046851..30bf919edfda 100644
>>>>>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>>>>>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>>>>>> @@ -813,6 +813,12 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
>>>>>>  		return ret;
>>>>>>  
>>>>>>  	if (rdt_cpu_has(X86_FEATURE_BMEC)) {
>>>>>> +		u32 eax, ebx, ecx, edx;
>>>>>> +
>>>>>> +		/* Detect list of bandwidth sources that can be tracked */
>>>>>> +		cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx);
>>>>>> +		hw_res->event_mask = ecx;
>>>>>> +
>>>>>
>>>>> This has the same issue as I mentioned in V1. Note that this treats
>>>>> reserved bits as valid values. I think this is a risky thing to do. For example
>>>>> when this code is run on future hardware the currently reserved bits may have
>>>>> values with different meaning than what this code uses it for.
>>>>
>>>> Sure. Will use the mask MAX_EVT_CONFIG_BITS.
>>>>               hw_res->mbm_cfg_mask = ecx &  MAX_EVT_CONFIG_BITS;
>>>>
>>>>>
>>>>>>  		if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
>>>>>>  			mbm_total_event.configurable = true;
>>>>>>  			mbm_config_rftype_init("mbm_total_bytes_config");
>>>>>> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>>>>>> index 69a1de92384a..8a1e9fdab974 100644
>>>>>> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>>>>>> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>>>>>> @@ -1537,17 +1537,14 @@ static void mon_event_config_read(void *info)
>>>>>>  {
>>>>>>  	struct mon_config_info *mon_info = info;
>>>>>>  	unsigned int index;
>>>>>> -	u64 msrval;
>>>>>> +	u32 h;
>>>>>>  
>>>>>>  	index = mon_event_config_index_get(mon_info->evtid);
>>>>>>  	if (index == INVALID_CONFIG_INDEX) {
>>>>>>  		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
>>>>>>  		return;
>>>>>>  	}
>>>>>> -	rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
>>>>>> -
>>>>>> -	/* Report only the valid event configuration bits */
>>>>>> -	mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
>>>>>> +	rdmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, h);
>>>>>
>>>>> I do not think this code needed to be changed. We do not want to treat
>>>>> reserved bits as valid values. 
>>>>
>>>> The logic is still the same. We don't have access to rdt_hw_resource in
>>>> this function. So, I just moved the masking to mbm_config_show while printing.
>>>
>>> Why do you need access to rdt_hw_resource? This comment is not about the bandwidth
>>> events supported by the device but instead the bits used to represent these events.
>>> This is the same issue as in rdt_get_mon_l3_config. The above change returns
>>> reserved bits as valid while the original code ensured that only bits used for
>>> field are returned (through the usage of MAX_EVT_CONFIG_BITS).
>>
>> We are already saving the valid bits in hw_res->mbm_cfg_mask during the init.
>>
>> hw_res->mbm_cfg_mask = ecx &  MAX_EVT_CONFIG_BITS;
>>
>> I thought we can use it here directly to mask any unsupported bits. So, I
>> re-arranged the code here.
> 
> I am not sure where you mean when you say "use it here" since mbm_cfg_mask is not
> used in mon_event_config_read(). My comment is related to mon_event_config_read()
> that can reasonably be expected to, and thus should, return the current "mon event
> config" value and nothing more. 
> 

Ok. Sure. Lets keep the same code as before.
  

Patch

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index d2979748fae4..3e2f505614d8 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -50,9 +50,6 @@ 
 /* Dirty Victims to All Types of Memory */
 #define DIRTY_VICTIMS_TO_ALL_MEM	BIT(6)
 
-/* Max event bits supported */
-#define MAX_EVT_CONFIG_BITS		GENMASK(6, 0)
-
 struct rdt_fs_context {
 	struct kernfs_fs_context	kfc;
 	bool				enable_cdpl2;
@@ -394,6 +391,7 @@  struct rdt_parse_data {
  * @msr_update:		Function pointer to update QOS MSRs
  * @mon_scale:		cqm counter * mon_scale = occupancy in bytes
  * @mbm_width:		Monitor width, to detect and correct for overflow.
+ * @event_mask:		Max supported event bitmask.
  * @cdp_enabled:	CDP state of this resource
  *
  * Members of this structure are either private to the architecture
@@ -408,6 +406,7 @@  struct rdt_hw_resource {
 				 struct rdt_resource *r);
 	unsigned int		mon_scale;
 	unsigned int		mbm_width;
+	unsigned int		event_mask;
 	bool			cdp_enabled;
 };
 
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index f136ac046851..30bf919edfda 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -813,6 +813,12 @@  int __init rdt_get_mon_l3_config(struct rdt_resource *r)
 		return ret;
 
 	if (rdt_cpu_has(X86_FEATURE_BMEC)) {
+		u32 eax, ebx, ecx, edx;
+
+		/* Detect list of bandwidth sources that can be tracked */
+		cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx);
+		hw_res->event_mask = ecx;
+
 		if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
 			mbm_total_event.configurable = true;
 			mbm_config_rftype_init("mbm_total_bytes_config");
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 69a1de92384a..8a1e9fdab974 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1537,17 +1537,14 @@  static void mon_event_config_read(void *info)
 {
 	struct mon_config_info *mon_info = info;
 	unsigned int index;
-	u64 msrval;
+	u32 h;
 
 	index = mon_event_config_index_get(mon_info->evtid);
 	if (index == INVALID_CONFIG_INDEX) {
 		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
 		return;
 	}
-	rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
-
-	/* Report only the valid event configuration bits */
-	mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
+	rdmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, h);
 }
 
 static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info)
@@ -1557,6 +1554,7 @@  static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mo
 
 static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
 {
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
 	struct mon_config_info mon_info = {0};
 	struct rdt_domain *dom;
 	bool sep = false;
@@ -1571,7 +1569,9 @@  static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid
 		mon_info.evtid = evtid;
 		mondata_config_read(dom, &mon_info);
 
-		seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config);
+		/* Report only the valid event configuration bits */
+		seq_printf(s, "%d=0x%02x", dom->id,
+			   mon_info.mon_config & hw_res->event_mask);
 		sep = true;
 	}
 	seq_puts(s, "\n");
@@ -1617,12 +1617,14 @@  static void mon_event_config_write(void *info)
 static int mbm_config_write_domain(struct rdt_resource *r,
 				   struct rdt_domain *d, u32 evtid, u32 val)
 {
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
 	struct mon_config_info mon_info = {0};
 	int ret = 0;
 
 	/* mon_config cannot be more than the supported set of events */
-	if (val > MAX_EVT_CONFIG_BITS) {
-		rdt_last_cmd_puts("Invalid event configuration\n");
+	if ((val & hw_res->event_mask) != val) {
+		rdt_last_cmd_printf("Invalid input: The maximum valid bitmask is 0x%02x\n",
+				    hw_res->event_mask);
 		return -EINVAL;
 	}