[v6,1/3] x86/resctrl: Add mount option "mba_MBps_event"

Message ID 20231207195613.153980-2-tony.luck@intel.com
State New
Headers
Series x86/resctrl: mba_MBps enhancements |

Commit Message

Luck, Tony Dec. 7, 2023, 7:56 p.m. UTC
  The MBA Software Controller(mba_sc) is a feedback loop that uses
measurements of local memory bandwidth to adjust MBA throttling levels
to keep workloads in a resctrl group within a target bandwidth set in
the schemata file.

Users may want to use total memory bandwidth instead of local to handle
workloads that have poor NUMA localization.

Add a new mount option "mba_MBps_event={event_name}" where event_name
is one of "mbm_Local_bytes" or "mbm_total_bytes" that allows a user to
specify which monitoring event to use.

Update the once-per-second polling code to use the chosen event (local
or total memory bandwidth).

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/resctrl.h                |  2 +
 arch/x86/kernel/cpu/resctrl/internal.h |  3 +-
 arch/x86/kernel/cpu/resctrl/monitor.c  | 21 +++++----
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 61 +++++++++++++++++++++-----
 4 files changed, 63 insertions(+), 24 deletions(-)
  

Comments

Peter Newman Dec. 8, 2023, 6:17 p.m. UTC | #1
Hi Tony,

On Thu, Dec 7, 2023 at 11:56 AM Tony Luck <tony.luck@intel.com> wrote:
>
> The MBA Software Controller(mba_sc) is a feedback loop that uses
> measurements of local memory bandwidth to adjust MBA throttling levels
> to keep workloads in a resctrl group within a target bandwidth set in
> the schemata file.
>
> Users may want to use total memory bandwidth instead of local to handle
> workloads that have poor NUMA localization.
>
> Add a new mount option "mba_MBps_event={event_name}" where event_name
> is one of "mbm_Local_bytes" or "mbm_total_bytes" that allows a user to

It's "mbm_local_bytes" in the matching logic later on.


> specify which monitoring event to use.
>
> Update the once-per-second polling code to use the chosen event (local
> or total memory bandwidth).
>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>  include/linux/resctrl.h                |  2 +
>  arch/x86/kernel/cpu/resctrl/internal.h |  3 +-
>  arch/x86/kernel/cpu/resctrl/monitor.c  | 21 +++++----
>  arch/x86/kernel/cpu/resctrl/rdtgroup.c | 61 +++++++++++++++++++++-----
>  4 files changed, 63 insertions(+), 24 deletions(-)
>
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index 66942d7fba7f..1feb3b2e64fa 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -129,6 +129,7 @@ enum membw_throttle_mode {
>   * @throttle_mode:     Bandwidth throttling mode when threads request
>   *                     different memory bandwidths
>   * @mba_sc:            True if MBA software controller(mba_sc) is enabled
> + * @mba_mbps_event:    Event (local or total) for mba_sc
>   * @mb_map:            Mapping of memory B/W percentage to memory B/W delay
>   */
>  struct resctrl_membw {
> @@ -138,6 +139,7 @@ struct resctrl_membw {
>         bool                            arch_needs_linear;
>         enum membw_throttle_mode        throttle_mode;
>         bool                            mba_sc;
> +       enum resctrl_event_id           mba_mbps_event;
>         u32                             *mb_map;
>  };
>
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index a4f1aa15f0a2..8b9b8f664324 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -58,7 +58,8 @@ struct rdt_fs_context {
>         struct kernfs_fs_context        kfc;
>         bool                            enable_cdpl2;
>         bool                            enable_cdpl3;
> -       bool                            enable_mba_mbps;
> +       bool                            enable_mba_mbps_local;
> +       bool                            enable_mba_mbps_total;
>         bool                            enable_debug;
>  };
>
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index f136ac046851..d9e590f1cbc3 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -431,9 +431,10 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
>   */
>  static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
>  {
> -       struct mbm_state *m = &rr->d->mbm_local[rmid];
>         u64 cur_bw, bytes, cur_bytes;
> +       struct mbm_state *m;
>
> +       m = get_mbm_state(rr->d, rmid, rr->evtid);

WARN_ON(m == NULL) since we assume the caller has confirmed rr->evtid
is an MBM event?

>         cur_bytes = rr->val;
>         bytes = cur_bytes - m->prev_bw_bytes;
>         m->prev_bw_bytes = cur_bytes;
> @@ -521,19 +522,21 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
>         u32 closid, rmid, cur_msr_val, new_msr_val;
>         struct mbm_state *pmbm_data, *cmbm_data;
>         u32 cur_bw, delta_bw, user_bw;
> +       enum resctrl_event_id evt_id;
>         struct rdt_resource *r_mba;
>         struct rdt_domain *dom_mba;
>         struct list_head *head;
>         struct rdtgroup *entry;
>
> -       if (!is_mbm_local_enabled())
> +       if (!is_mbm_enabled())
>                 return;
>
>         r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
> +       evt_id = r_mba->membw.mba_mbps_event;
>
>         closid = rgrp->closid;
>         rmid = rgrp->mon.rmid;
> -       pmbm_data = &dom_mbm->mbm_local[rmid];
> +       pmbm_data = get_mbm_state(dom_mbm, rmid, evt_id);

One defensive WARN_ON((!pmbm_data) for this function to ensure evt_id
is valid for this call and the ones in the loop below?

>
>         dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
>         if (!dom_mba) {
> @@ -553,7 +556,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
>          */
>         head = &rgrp->mon.crdtgrp_list;
>         list_for_each_entry(entry, head, mon.crdtgrp_list) {
> -               cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
> +               cmbm_data = get_mbm_state(dom_mbm, entry->mon.rmid, evt_id);
>                 cur_bw += cmbm_data->prev_bw;
>                 delta_bw += cmbm_data->delta_bw;
>         }
> @@ -616,18 +619,14 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
>                 rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
>                 rr.val = 0;
>                 __mon_event_count(rmid, &rr);
> +               if (is_mba_sc(NULL) && rr.evtid == r->membw.mba_mbps_event)
> +                       mbm_bw_count(rmid, &rr);
>         }
>         if (is_mbm_local_enabled()) {
>                 rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
>                 rr.val = 0;
>                 __mon_event_count(rmid, &rr);
> -
> -               /*
> -                * Call the MBA software controller only for the
> -                * control groups and when user has enabled
> -                * the software controller explicitly.
> -                */
> -               if (is_mba_sc(NULL))
> +               if (is_mba_sc(NULL) && rr.evtid == r->membw.mba_mbps_event)
>                         mbm_bw_count(rmid, &rr);
>         }
>  }
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 69a1de92384a..5f64a0b2597c 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -2294,7 +2294,7 @@ static bool supports_mba_mbps(void)
>  {
>         struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
>
> -       return (is_mbm_local_enabled() &&
> +       return (is_mbm_enabled() &&
>                 r->alloc_capable && is_mba_linear());
>  }
>
> @@ -2302,7 +2302,7 @@ static bool supports_mba_mbps(void)
>   * Enable or disable the MBA software controller
>   * which helps user specify bandwidth in MBps.
>   */
> -static int set_mba_sc(bool mba_sc)
> +static int set_mba_sc(bool mba_sc, enum resctrl_event_id mba_mbps_event)
>  {
>         struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
>         u32 num_closid = resctrl_arch_get_num_closid(r);
> @@ -2313,6 +2313,7 @@ static int set_mba_sc(bool mba_sc)
>                 return -EINVAL;
>
>         r->membw.mba_sc = mba_sc;
> +       r->membw.mba_mbps_event = mba_mbps_event;
>
>         list_for_each_entry(d, &r->domains, list) {
>                 for (i = 0; i < num_closid; i++)
> @@ -2445,13 +2446,14 @@ static void rdt_disable_ctx(void)
>  {
>         resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
>         resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
> -       set_mba_sc(false);
> +       set_mba_sc(false, QOS_L3_MBM_LOCAL_EVENT_ID);
>
>         resctrl_debug = false;
>  }
>
>  static int rdt_enable_ctx(struct rdt_fs_context *ctx)
>  {
> +       enum resctrl_event_id mba_mbps_event;
>         int ret = 0;
>
>         if (ctx->enable_cdpl2) {
> @@ -2466,8 +2468,12 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx)
>                         goto out_cdpl2;
>         }
>
> -       if (ctx->enable_mba_mbps) {
> -               ret = set_mba_sc(true);
> +       if (ctx->enable_mba_mbps_local || ctx->enable_mba_mbps_total) {
> +               if (ctx->enable_mba_mbps_total)
> +                       mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
> +               else
> +                       mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;

Total takes precedence over local when the user picks both.

> +               ret = set_mba_sc(true, mba_mbps_event);
>                 if (ret)
>                         goto out_cdpl3;
>         }
> @@ -2683,15 +2689,17 @@ enum rdt_param {
>         Opt_cdp,
>         Opt_cdpl2,
>         Opt_mba_mbps,
> +       Opt_mba_mbps_event,
>         Opt_debug,
>         nr__rdt_params
>  };
>
>  static const struct fs_parameter_spec rdt_fs_parameters[] = {
> -       fsparam_flag("cdp",             Opt_cdp),
> -       fsparam_flag("cdpl2",           Opt_cdpl2),
> -       fsparam_flag("mba_MBps",        Opt_mba_mbps),
> -       fsparam_flag("debug",           Opt_debug),
> +       fsparam_flag("cdp",                     Opt_cdp),
> +       fsparam_flag("cdpl2",                   Opt_cdpl2),
> +       fsparam_flag("mba_MBps",                Opt_mba_mbps),
> +       fsparam_string("mba_MBps_event",        Opt_mba_mbps_event),
> +       fsparam_flag("debug",                   Opt_debug),
>         {}
>  };
>
> @@ -2715,7 +2723,25 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
>         case Opt_mba_mbps:
>                 if (!supports_mba_mbps())
>                         return -EINVAL;
> -               ctx->enable_mba_mbps = true;
> +               if (is_mbm_local_enabled())
> +                       ctx->enable_mba_mbps_local = true;
> +               else
> +                       return -EINVAL;
> +               return 0;
> +       case Opt_mba_mbps_event:
> +               if (!supports_mba_mbps())
> +                       return -EINVAL;
> +               if (!strcmp("mbm_local_bytes", param->string)) {
> +                       if (!is_mbm_local_enabled())
> +                               return -EINVAL;
> +                       ctx->enable_mba_mbps_local = true;
> +               } else if (!strcmp("mbm_total_bytes", param->string)) {
> +                       if (!is_mbm_total_enabled())
> +                               return -EINVAL;
> +                       ctx->enable_mba_mbps_total = true;
> +               } else {
> +                       return -EINVAL;

It looks like if I pass
"mba_MBps_event=mbm_total_bytes,mba_MBps_event=mbm_local_bytes" I can
set both flags true.

> +               }
>                 return 0;
>         case Opt_debug:
>                 ctx->enable_debug = true;
> @@ -3780,16 +3806,27 @@ static int rdtgroup_rename(struct kernfs_node *kn,
>         return ret;
>  }
>
> +static char *mba_sc_event_opt_name(struct rdt_resource *r)
> +{
> +       if (r->membw.mba_mbps_event == QOS_L3_MBM_LOCAL_EVENT_ID)
> +               return ",mba_MBps_event=mbm_local_bytes";
> +       else if (r->membw.mba_mbps_event == QOS_L3_MBM_TOTAL_EVENT_ID)
> +               return ",mba_MBps_event=mbm_total_bytes";
> +       return "";
> +}
> +
>  static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
>  {
> +       struct rdt_resource *r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
> +
>         if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
>                 seq_puts(seq, ",cdp");
>
>         if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
>                 seq_puts(seq, ",cdpl2");
>
> -       if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
> -               seq_puts(seq, ",mba_MBps");
> +       if (is_mba_sc(r_mba))
> +               seq_puts(seq, mba_sc_event_opt_name(r_mba));
>
>         if (resctrl_debug)
>                 seq_puts(seq, ",debug");
> --
> 2.41.0
>

Consider the setting-both-events quirk and a little bit of defensive
programming for get_mbm_data() returning NULL.

Assuming the case of "Local" is fixed in the commit message:

Reviewed-by: Peter Newman <peternewman@google.com>

Thanks!
  
Moger, Babu Dec. 8, 2023, 6:29 p.m. UTC | #2
Hi Tony,

On 12/7/2023 1:56 PM, Tony Luck wrote:
> The MBA Software Controller(mba_sc) is a feedback loop that uses
> measurements of local memory bandwidth to adjust MBA throttling levels
> to keep workloads in a resctrl group within a target bandwidth set in
> the schemata file.
>
> Users may want to use total memory bandwidth instead of local to handle
> workloads that have poor NUMA localization.
>
> Add a new mount option "mba_MBps_event={event_name}" where event_name
> is one of "mbm_Local_bytes" or "mbm_total_bytes" that allows a user to
> specify which monitoring event to use.
>
> Update the once-per-second polling code to use the chosen event (local
> or total memory bandwidth).
>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>   include/linux/resctrl.h                |  2 +
>   arch/x86/kernel/cpu/resctrl/internal.h |  3 +-
>   arch/x86/kernel/cpu/resctrl/monitor.c  | 21 +++++----
>   arch/x86/kernel/cpu/resctrl/rdtgroup.c | 61 +++++++++++++++++++++-----
>   4 files changed, 63 insertions(+), 24 deletions(-)
>
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index 66942d7fba7f..1feb3b2e64fa 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -129,6 +129,7 @@ enum membw_throttle_mode {
>    * @throttle_mode:	Bandwidth throttling mode when threads request
>    *			different memory bandwidths
>    * @mba_sc:		True if MBA software controller(mba_sc) is enabled
> + * @mba_mbps_event:	Event (local or total) for mba_sc
>    * @mb_map:		Mapping of memory B/W percentage to memory B/W delay
>    */
>   struct resctrl_membw {
> @@ -138,6 +139,7 @@ struct resctrl_membw {
>   	bool				arch_needs_linear;
>   	enum membw_throttle_mode	throttle_mode;
>   	bool				mba_sc;
> +	enum resctrl_event_id		mba_mbps_event;
>   	u32				*mb_map;
>   };
>   
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index a4f1aa15f0a2..8b9b8f664324 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -58,7 +58,8 @@ struct rdt_fs_context {
>   	struct kernfs_fs_context	kfc;
>   	bool				enable_cdpl2;
>   	bool				enable_cdpl3;
> -	bool				enable_mba_mbps;
> +	bool				enable_mba_mbps_local;
> +	bool				enable_mba_mbps_total;
>   	bool				enable_debug;
>   };
>   
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index f136ac046851..d9e590f1cbc3 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -431,9 +431,10 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
>    */
>   static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
>   {
> -	struct mbm_state *m = &rr->d->mbm_local[rmid];
>   	u64 cur_bw, bytes, cur_bytes;
> +	struct mbm_state *m;
>   
> +	m = get_mbm_state(rr->d, rmid, rr->evtid);
>   	cur_bytes = rr->val;
>   	bytes = cur_bytes - m->prev_bw_bytes;
>   	m->prev_bw_bytes = cur_bytes;
> @@ -521,19 +522,21 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
>   	u32 closid, rmid, cur_msr_val, new_msr_val;
>   	struct mbm_state *pmbm_data, *cmbm_data;
>   	u32 cur_bw, delta_bw, user_bw;
> +	enum resctrl_event_id evt_id;
>   	struct rdt_resource *r_mba;
>   	struct rdt_domain *dom_mba;
>   	struct list_head *head;
>   	struct rdtgroup *entry;
>   
> -	if (!is_mbm_local_enabled())
> +	if (!is_mbm_enabled())
>   		return;
>   
>   	r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
> +	evt_id = r_mba->membw.mba_mbps_event;
>   
>   	closid = rgrp->closid;
>   	rmid = rgrp->mon.rmid;
> -	pmbm_data = &dom_mbm->mbm_local[rmid];
> +	pmbm_data = get_mbm_state(dom_mbm, rmid, evt_id);
>   
>   	dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
>   	if (!dom_mba) {
> @@ -553,7 +556,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
>   	 */
>   	head = &rgrp->mon.crdtgrp_list;
>   	list_for_each_entry(entry, head, mon.crdtgrp_list) {
> -		cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
> +		cmbm_data = get_mbm_state(dom_mbm, entry->mon.rmid, evt_id);
>   		cur_bw += cmbm_data->prev_bw;
>   		delta_bw += cmbm_data->delta_bw;
>   	}
> @@ -616,18 +619,14 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
>   		rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
>   		rr.val = 0;
>   		__mon_event_count(rmid, &rr);
> +		if (is_mba_sc(NULL) && rr.evtid == r->membw.mba_mbps_event)
> +			mbm_bw_count(rmid, &rr);
>   	}
>   	if (is_mbm_local_enabled()) {
>   		rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
>   		rr.val = 0;
>   		__mon_event_count(rmid, &rr);
> -
> -		/*
> -		 * Call the MBA software controller only for the
> -		 * control groups and when user has enabled
> -		 * the software controller explicitly.
> -		 */
> -		if (is_mba_sc(NULL))
> +		if (is_mba_sc(NULL) && rr.evtid == r->membw.mba_mbps_event)
>   			mbm_bw_count(rmid, &rr);
>   	}
>   }
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 69a1de92384a..5f64a0b2597c 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -2294,7 +2294,7 @@ static bool supports_mba_mbps(void)
>   {
>   	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
>   
> -	return (is_mbm_local_enabled() &&
> +	return (is_mbm_enabled() &&
>   		r->alloc_capable && is_mba_linear());
>   }
>   
> @@ -2302,7 +2302,7 @@ static bool supports_mba_mbps(void)
>    * Enable or disable the MBA software controller
>    * which helps user specify bandwidth in MBps.
>    */
> -static int set_mba_sc(bool mba_sc)
> +static int set_mba_sc(bool mba_sc, enum resctrl_event_id mba_mbps_event)
>   {
>   	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
>   	u32 num_closid = resctrl_arch_get_num_closid(r);
> @@ -2313,6 +2313,7 @@ static int set_mba_sc(bool mba_sc)
>   		return -EINVAL;
>   
>   	r->membw.mba_sc = mba_sc;
> +	r->membw.mba_mbps_event = mba_mbps_event;
>   
>   	list_for_each_entry(d, &r->domains, list) {
>   		for (i = 0; i < num_closid; i++)
> @@ -2445,13 +2446,14 @@ static void rdt_disable_ctx(void)
>   {
>   	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
>   	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
> -	set_mba_sc(false);
> +	set_mba_sc(false, QOS_L3_MBM_LOCAL_EVENT_ID);

This is kind of miss leading. Why do you pass 
"QOS_L3_MBM_LOCAL_EVENT_ID" here?

If you move the following initialization to rdt_enable_ctx, then you 
don't need to pass the second argument.

r->membw.mba_mbps_event = mba_mbps_event;

Thanks
Babu

>   
>   	resctrl_debug = false;
>   }
>   
>   static int rdt_enable_ctx(struct rdt_fs_context *ctx)
>   {
> +	enum resctrl_event_id mba_mbps_event;
>   	int ret = 0;
>   
>   	if (ctx->enable_cdpl2) {
> @@ -2466,8 +2468,12 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx)
>   			goto out_cdpl2;
>   	}
>   
> -	if (ctx->enable_mba_mbps) {
> -		ret = set_mba_sc(true);
> +	if (ctx->enable_mba_mbps_local || ctx->enable_mba_mbps_total) {
> +		if (ctx->enable_mba_mbps_total)
> +			mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
> +		else
> +			mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
> +		ret = set_mba_sc(true, mba_mbps_event);
>   		if (ret)
>   			goto out_cdpl3;
>   	}
> @@ -2683,15 +2689,17 @@ enum rdt_param {
>   	Opt_cdp,
>   	Opt_cdpl2,
>   	Opt_mba_mbps,
> +	Opt_mba_mbps_event,
>   	Opt_debug,
>   	nr__rdt_params
>   };
>   
>   static const struct fs_parameter_spec rdt_fs_parameters[] = {
> -	fsparam_flag("cdp",		Opt_cdp),
> -	fsparam_flag("cdpl2",		Opt_cdpl2),
> -	fsparam_flag("mba_MBps",	Opt_mba_mbps),
> -	fsparam_flag("debug",		Opt_debug),
> +	fsparam_flag("cdp",			Opt_cdp),
> +	fsparam_flag("cdpl2",			Opt_cdpl2),
> +	fsparam_flag("mba_MBps",		Opt_mba_mbps),
> +	fsparam_string("mba_MBps_event",	Opt_mba_mbps_event),
> +	fsparam_flag("debug",			Opt_debug),
>   	{}
>   };
>   
> @@ -2715,7 +2723,25 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
>   	case Opt_mba_mbps:
>   		if (!supports_mba_mbps())
>   			return -EINVAL;
> -		ctx->enable_mba_mbps = true;
> +		if (is_mbm_local_enabled())
> +			ctx->enable_mba_mbps_local = true;
> +		else
> +			return -EINVAL;
> +		return 0;
> +	case Opt_mba_mbps_event:
> +		if (!supports_mba_mbps())
> +			return -EINVAL;
> +		if (!strcmp("mbm_local_bytes", param->string)) {
> +			if (!is_mbm_local_enabled())
> +				return -EINVAL;
> +			ctx->enable_mba_mbps_local = true;
> +		} else if (!strcmp("mbm_total_bytes", param->string)) {
> +			if (!is_mbm_total_enabled())
> +				return -EINVAL;
> +			ctx->enable_mba_mbps_total = true;
> +		} else {
> +			return -EINVAL;
> +		}
>   		return 0;
>   	case Opt_debug:
>   		ctx->enable_debug = true;
> @@ -3780,16 +3806,27 @@ static int rdtgroup_rename(struct kernfs_node *kn,
>   	return ret;
>   }
>   
> +static char *mba_sc_event_opt_name(struct rdt_resource *r)
> +{
> +	if (r->membw.mba_mbps_event == QOS_L3_MBM_LOCAL_EVENT_ID)
> +		return ",mba_MBps_event=mbm_local_bytes";
> +	else if (r->membw.mba_mbps_event == QOS_L3_MBM_TOTAL_EVENT_ID)
> +		return ",mba_MBps_event=mbm_total_bytes";
> +	return "";
> +}
> +
>   static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
>   {
> +	struct rdt_resource *r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
> +
>   	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
>   		seq_puts(seq, ",cdp");
>   
>   	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
>   		seq_puts(seq, ",cdpl2");
>   
> -	if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
> -		seq_puts(seq, ",mba_MBps");
> +	if (is_mba_sc(r_mba))
> +		seq_puts(seq, mba_sc_event_opt_name(r_mba));
>   
>   	if (resctrl_debug)
>   		seq_puts(seq, ",debug");
  
Luck, Tony Dec. 8, 2023, 9:50 p.m. UTC | #3
On Fri, Dec 08, 2023 at 12:29:18PM -0600, Moger, Babu wrote:
> Hi Tony,
> 
> On 12/7/2023 1:56 PM, Tony Luck wrote:
> > -static int set_mba_sc(bool mba_sc)
> > +static int set_mba_sc(bool mba_sc, enum resctrl_event_id mba_mbps_event)
> >   {
> >   	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
> >   	u32 num_closid = resctrl_arch_get_num_closid(r);
> > @@ -2313,6 +2313,7 @@ static int set_mba_sc(bool mba_sc)
> >   		return -EINVAL;
> >   	r->membw.mba_sc = mba_sc;
> > +	r->membw.mba_mbps_event = mba_mbps_event;
> >   	list_for_each_entry(d, &r->domains, list) {
> >   		for (i = 0; i < num_closid; i++)
> > @@ -2445,13 +2446,14 @@ static void rdt_disable_ctx(void)
> >   {
> >   	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
> >   	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
> > -	set_mba_sc(false);
> > +	set_mba_sc(false, QOS_L3_MBM_LOCAL_EVENT_ID);
> 
> This is kind of miss leading. Why do you pass "QOS_L3_MBM_LOCAL_EVENT_ID"
> here?
> 
> If you move the following initialization to rdt_enable_ctx, then you don't
> need to pass the second argument.
> 
> r->membw.mba_mbps_event = mba_mbps_event;

Babu,

Yes. That was funky. I will drop the second argumen to set_mba_sc() and
move the initialization to rdt_enable_ctx()

Thnaks for the review.

-Tony
  
Luck, Tony Dec. 8, 2023, 9:57 p.m. UTC | #4
On Fri, Dec 08, 2023 at 10:17:08AM -0800, Peter Newman wrote:
> Hi Tony,
> 
> On Thu, Dec 7, 2023 at 11:56 AM Tony Luck <tony.luck@intel.com> wrote:
> >
> > The MBA Software Controller(mba_sc) is a feedback loop that uses
> > measurements of local memory bandwidth to adjust MBA throttling levels
> > to keep workloads in a resctrl group within a target bandwidth set in
> > the schemata file.
> >
> > Users may want to use total memory bandwidth instead of local to handle
> > workloads that have poor NUMA localization.
> >
> > Add a new mount option "mba_MBps_event={event_name}" where event_name
> > is one of "mbm_Local_bytes" or "mbm_total_bytes" that allows a user to
> 
> It's "mbm_local_bytes" in the matching logic later on.

Clearly my left hand operating the shift key is not well synchronized
with my right hand moving from "_" to "l".

Will fix.

> > diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> > index a4f1aa15f0a2..8b9b8f664324 100644
> > --- a/arch/x86/kernel/cpu/resctrl/internal.h
> > +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> > @@ -58,7 +58,8 @@ struct rdt_fs_context {
> >         struct kernfs_fs_context        kfc;
> >         bool                            enable_cdpl2;
> >         bool                            enable_cdpl3;
> > -       bool                            enable_mba_mbps;
> > +       bool                            enable_mba_mbps_local;
> > +       bool                            enable_mba_mbps_total;
> >         bool                            enable_debug;
> >  };
> >
> > diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> > index f136ac046851..d9e590f1cbc3 100644
> > --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> > +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> > @@ -431,9 +431,10 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
> >   */
> >  static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
> >  {
> > -       struct mbm_state *m = &rr->d->mbm_local[rmid];
> >         u64 cur_bw, bytes, cur_bytes;
> > +       struct mbm_state *m;
> >
> > +       m = get_mbm_state(rr->d, rmid, rr->evtid);
> 
> WARN_ON(m == NULL) since we assume the caller has confirmed rr->evtid
> is an MBM event?

Will add this WARN_ON (though I'll write "WARN_ON(!m);" rather than "== NULL").
> 
> >         cur_bytes = rr->val;
> >         bytes = cur_bytes - m->prev_bw_bytes;
> >         m->prev_bw_bytes = cur_bytes;
> > @@ -521,19 +522,21 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
> >         u32 closid, rmid, cur_msr_val, new_msr_val;
> >         struct mbm_state *pmbm_data, *cmbm_data;
> >         u32 cur_bw, delta_bw, user_bw;
> > +       enum resctrl_event_id evt_id;
> >         struct rdt_resource *r_mba;
> >         struct rdt_domain *dom_mba;
> >         struct list_head *head;
> >         struct rdtgroup *entry;
> >
> > -       if (!is_mbm_local_enabled())
> > +       if (!is_mbm_enabled())
> >                 return;
> >
> >         r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
> > +       evt_id = r_mba->membw.mba_mbps_event;
> >
> >         closid = rgrp->closid;
> >         rmid = rgrp->mon.rmid;
> > -       pmbm_data = &dom_mbm->mbm_local[rmid];
> > +       pmbm_data = get_mbm_state(dom_mbm, rmid, evt_id);
> 
> One defensive WARN_ON((!pmbm_data) for this function to ensure evt_id
> is valid for this call and the ones in the loop below?

Will add this. And the WARN_ON(!cmbm_data); in the loop.

> > @@ -2466,8 +2468,12 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx)
> >                         goto out_cdpl2;
> >         }
> >
> > -       if (ctx->enable_mba_mbps) {
> > -               ret = set_mba_sc(true);
> > +       if (ctx->enable_mba_mbps_local || ctx->enable_mba_mbps_total) {
> > +               if (ctx->enable_mba_mbps_total)
> > +                       mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
> > +               else
> > +                       mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
> 
> Total takes precedence over local when the user picks both.

Harmless ... but see below.

> > +               ret = set_mba_sc(true, mba_mbps_event);
> >                 if (ret)
> >                         goto out_cdpl3;
> >         }
> > @@ -2683,15 +2689,17 @@ enum rdt_param {
> >         Opt_cdp,
> >         Opt_cdpl2,
> >         Opt_mba_mbps,
> > +       Opt_mba_mbps_event,
> >         Opt_debug,
> >         nr__rdt_params
> >  };
> >
> >  static const struct fs_parameter_spec rdt_fs_parameters[] = {
> > -       fsparam_flag("cdp",             Opt_cdp),
> > -       fsparam_flag("cdpl2",           Opt_cdpl2),
> > -       fsparam_flag("mba_MBps",        Opt_mba_mbps),
> > -       fsparam_flag("debug",           Opt_debug),
> > +       fsparam_flag("cdp",                     Opt_cdp),
> > +       fsparam_flag("cdpl2",                   Opt_cdpl2),
> > +       fsparam_flag("mba_MBps",                Opt_mba_mbps),
> > +       fsparam_string("mba_MBps_event",        Opt_mba_mbps_event),
> > +       fsparam_flag("debug",                   Opt_debug),
> >         {}
> >  };
> >
> > @@ -2715,7 +2723,25 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
> >         case Opt_mba_mbps:
> >                 if (!supports_mba_mbps())
> >                         return -EINVAL;
> > -               ctx->enable_mba_mbps = true;
> > +               if (is_mbm_local_enabled())
> > +                       ctx->enable_mba_mbps_local = true;
> > +               else
> > +                       return -EINVAL;
> > +               return 0;
> > +       case Opt_mba_mbps_event:
> > +               if (!supports_mba_mbps())
> > +                       return -EINVAL;
> > +               if (!strcmp("mbm_local_bytes", param->string)) {
> > +                       if (!is_mbm_local_enabled())
> > +                               return -EINVAL;
> > +                       ctx->enable_mba_mbps_local = true;
> > +               } else if (!strcmp("mbm_total_bytes", param->string)) {
> > +                       if (!is_mbm_total_enabled())
> > +                               return -EINVAL;
> > +                       ctx->enable_mba_mbps_total = true;
> > +               } else {
> > +                       return -EINVAL;
> 
> It looks like if I pass
> "mba_MBps_event=mbm_total_bytes,mba_MBps_event=mbm_local_bytes" I can
> set both flags true.

That's going to be confusing. I'll add code to stop the user from
passing both options.

> > --
> > 2.41.0
> >
> 
> Consider the setting-both-events quirk and a little bit of defensive
> programming for get_mbm_data() returning NULL.
> 
> Assuming the case of "Local" is fixed in the commit message:
> 
> Reviewed-by: Peter Newman <peternewman@google.com>

Thanks for reviewing, and for the tags for parts 2 & 3.

-Tony
  
Peter Newman Dec. 8, 2023, 10:09 p.m. UTC | #5
On Fri, Dec 8, 2023 at 1:57 PM Tony Luck <tony.luck@intel.com> wrote:
>
> On Fri, Dec 08, 2023 at 10:17:08AM -0800, Peter Newman wrote:
> > Hi Tony,
> >
> > On Thu, Dec 7, 2023 at 11:56 AM Tony Luck <tony.luck@intel.com> wrote:
> > > @@ -2715,7 +2723,25 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
> > >         case Opt_mba_mbps:
> > >                 if (!supports_mba_mbps())
> > >                         return -EINVAL;
> > > -               ctx->enable_mba_mbps = true;
> > > +               if (is_mbm_local_enabled())
> > > +                       ctx->enable_mba_mbps_local = true;
> > > +               else
> > > +                       return -EINVAL;
> > > +               return 0;
> > > +       case Opt_mba_mbps_event:
> > > +               if (!supports_mba_mbps())
> > > +                       return -EINVAL;
> > > +               if (!strcmp("mbm_local_bytes", param->string)) {
> > > +                       if (!is_mbm_local_enabled())
> > > +                               return -EINVAL;
> > > +                       ctx->enable_mba_mbps_local = true;
> > > +               } else if (!strcmp("mbm_total_bytes", param->string)) {
> > > +                       if (!is_mbm_total_enabled())
> > > +                               return -EINVAL;
> > > +                       ctx->enable_mba_mbps_total = true;
> > > +               } else {
> > > +                       return -EINVAL;
> >
> > It looks like if I pass
> > "mba_MBps_event=mbm_total_bytes,mba_MBps_event=mbm_local_bytes" I can
> > set both flags true.
>
> That's going to be confusing. I'll add code to stop the user from
> passing both options.

Also kind of confusing, after reading the second patch, I realized
"mba_MBps_event=mbm_total_bytes,mba_MBps" also results in both being
set. If you're able to fail the mount operation if both flags somehow
get set, that would address this one too.

-Peter
  
Luck, Tony Dec. 8, 2023, 10:37 p.m. UTC | #6
> > That's going to be confusing. I'll add code to stop the user from
> > passing both options.
>
> Also kind of confusing, after reading the second patch, I realized
> "mba_MBps_event=mbm_total_bytes,mba_MBps" also results in both being
> set. If you're able to fail the mount operation if both flags somehow
> get set, that would address this one too.

Peter,

Yes. That's possible too. I'll cover that case in the next version. I'll wait
until this gets to the top of Reinette's review queue before posting
again.

-Tony
  
Reinette Chatre Dec. 12, 2023, 5:54 p.m. UTC | #7
On 12/8/2023 2:09 PM, Peter Newman wrote:
> On Fri, Dec 8, 2023 at 1:57 PM Tony Luck <tony.luck@intel.com> wrote:
>>
>> On Fri, Dec 08, 2023 at 10:17:08AM -0800, Peter Newman wrote:
>>> Hi Tony,
>>>
>>> On Thu, Dec 7, 2023 at 11:56 AM Tony Luck <tony.luck@intel.com> wrote:
>>>> @@ -2715,7 +2723,25 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
>>>>         case Opt_mba_mbps:
>>>>                 if (!supports_mba_mbps())
>>>>                         return -EINVAL;
>>>> -               ctx->enable_mba_mbps = true;
>>>> +               if (is_mbm_local_enabled())
>>>> +                       ctx->enable_mba_mbps_local = true;
>>>> +               else
>>>> +                       return -EINVAL;
>>>> +               return 0;
>>>> +       case Opt_mba_mbps_event:
>>>> +               if (!supports_mba_mbps())
>>>> +                       return -EINVAL;
>>>> +               if (!strcmp("mbm_local_bytes", param->string)) {
>>>> +                       if (!is_mbm_local_enabled())
>>>> +                               return -EINVAL;
>>>> +                       ctx->enable_mba_mbps_local = true;
>>>> +               } else if (!strcmp("mbm_total_bytes", param->string)) {
>>>> +                       if (!is_mbm_total_enabled())
>>>> +                               return -EINVAL;
>>>> +                       ctx->enable_mba_mbps_total = true;
>>>> +               } else {
>>>> +                       return -EINVAL;
>>>
>>> It looks like if I pass
>>> "mba_MBps_event=mbm_total_bytes,mba_MBps_event=mbm_local_bytes" I can
>>> set both flags true.
>>
>> That's going to be confusing. I'll add code to stop the user from
>> passing both options.
> 
> Also kind of confusing, after reading the second patch, I realized
> "mba_MBps_event=mbm_total_bytes,mba_MBps" also results in both being
> set. If you're able to fail the mount operation if both flags somehow
> get set, that would address this one too.

Are two separate flags required? All existing options within struct rdt_fs_context
are of type bool but that does not imply that it is the required type for
all. 

Reinette
  
Reinette Chatre Dec. 12, 2023, 6:59 p.m. UTC | #8
Hi Tony,

I'm just adding a few minor comments to the bigger items already
mentioned ...

On 12/7/2023 11:56 AM, Tony Luck wrote:

...
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index 66942d7fba7f..1feb3b2e64fa 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -129,6 +129,7 @@ enum membw_throttle_mode {
>   * @throttle_mode:	Bandwidth throttling mode when threads request
>   *			different memory bandwidths
>   * @mba_sc:		True if MBA software controller(mba_sc) is enabled
> + * @mba_mbps_event:	Event (local or total) for mba_sc

This is quite cryptic and does not add more than the variable name and type.
One example could be:
	"Monitoring event guiding feedback loop when @mba_sc is true."

Please feel free to improve.

...

> @@ -3780,16 +3806,27 @@ static int rdtgroup_rename(struct kernfs_node *kn,
>  	return ret;
>  }
>  
> +static char *mba_sc_event_opt_name(struct rdt_resource *r)

This can be "const char *".

Reinette
  
Luck, Tony Dec. 12, 2023, 8:02 p.m. UTC | #9
On Tue, Dec 12, 2023 at 09:54:38AM -0800, Reinette Chatre wrote:
> 
> On 12/8/2023 2:09 PM, Peter Newman wrote:
> > On Fri, Dec 8, 2023 at 1:57 PM Tony Luck <tony.luck@intel.com> wrote:
> >>
> >> On Fri, Dec 08, 2023 at 10:17:08AM -0800, Peter Newman wrote:
> >>> Hi Tony,
> >>>
> >>> On Thu, Dec 7, 2023 at 11:56 AM Tony Luck <tony.luck@intel.com> wrote:
> >>>> @@ -2715,7 +2723,25 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
> >>>>         case Opt_mba_mbps:
> >>>>                 if (!supports_mba_mbps())
> >>>>                         return -EINVAL;
> >>>> -               ctx->enable_mba_mbps = true;
> >>>> +               if (is_mbm_local_enabled())
> >>>> +                       ctx->enable_mba_mbps_local = true;
> >>>> +               else
> >>>> +                       return -EINVAL;
> >>>> +               return 0;
> >>>> +       case Opt_mba_mbps_event:
> >>>> +               if (!supports_mba_mbps())
> >>>> +                       return -EINVAL;
> >>>> +               if (!strcmp("mbm_local_bytes", param->string)) {
> >>>> +                       if (!is_mbm_local_enabled())
> >>>> +                               return -EINVAL;
> >>>> +                       ctx->enable_mba_mbps_local = true;
> >>>> +               } else if (!strcmp("mbm_total_bytes", param->string)) {
> >>>> +                       if (!is_mbm_total_enabled())
> >>>> +                               return -EINVAL;
> >>>> +                       ctx->enable_mba_mbps_total = true;
> >>>> +               } else {
> >>>> +                       return -EINVAL;
> >>>
> >>> It looks like if I pass
> >>> "mba_MBps_event=mbm_total_bytes,mba_MBps_event=mbm_local_bytes" I can
> >>> set both flags true.
> >>
> >> That's going to be confusing. I'll add code to stop the user from
> >> passing both options.
> > 
> > Also kind of confusing, after reading the second patch, I realized
> > "mba_MBps_event=mbm_total_bytes,mba_MBps" also results in both being
> > set. If you're able to fail the mount operation if both flags somehow
> > get set, that would address this one too.
> 
> Are two separate flags required? All existing options within struct rdt_fs_context
> are of type bool but that does not imply that it is the required type for
> all. 

Reinette,

Maybe a flag and a value?  The structure becomes:

struct rdt_fs_context {
	struct kernfs_fs_context	kfc;
	bool				enable_cdpl2;
	bool				enable_cdpl3;
	bool				enable_mba_mbps;
	enum resctrl_event_id		mba_mbps_event;
	bool				enable_debug;
};

Mount option parsing (including blocking user from setting the options
multiple times):

	case Opt_mba_mbps:
		if (!supports_mba_mbps() || ctx->enable_mba_mbps)
			return -EINVAL;
		if (is_mbm_local_enabled())
			ctx->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
		else if (is_mbm_total_enabled())
			ctx->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
		else
			return -EINVAL;
		ctx->enable_mba_mbps = true;
		return 0;
	case Opt_mba_mbps_event:
		if (!supports_mba_mbps() || ctx->enable_mba_mbps)
			return -EINVAL;
		if (!strcmp("mbm_local_bytes", param->string))
			ctx->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
		else if (!strcmp("mbm_total_bytes", param->string))
			ctx->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
		else
			return -EINVAL;
		ctx->enable_mba_mbps = true;
		return 0;


and use of the options to enable the feature:

	if (ctx->enable_mba_mbps) {
		r->membw.mba_mbps_event = ctx->mba_mbps_event;
		ret = set_mba_sc(true);
		if (ret)
			goto out_cdpl3;
	}

-Tony
  
Reinette Chatre Dec. 12, 2023, 9:42 p.m. UTC | #10
Hi Tony,

On 12/12/2023 12:02 PM, Tony Luck wrote:
> On Tue, Dec 12, 2023 at 09:54:38AM -0800, Reinette Chatre wrote:
>>
>> On 12/8/2023 2:09 PM, Peter Newman wrote:
>>> On Fri, Dec 8, 2023 at 1:57 PM Tony Luck <tony.luck@intel.com> wrote:
>>>>
>>>> On Fri, Dec 08, 2023 at 10:17:08AM -0800, Peter Newman wrote:
>>>>> Hi Tony,
>>>>>
>>>>> On Thu, Dec 7, 2023 at 11:56 AM Tony Luck <tony.luck@intel.com> wrote:
>>>>>> @@ -2715,7 +2723,25 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
>>>>>>         case Opt_mba_mbps:
>>>>>>                 if (!supports_mba_mbps())
>>>>>>                         return -EINVAL;
>>>>>> -               ctx->enable_mba_mbps = true;
>>>>>> +               if (is_mbm_local_enabled())
>>>>>> +                       ctx->enable_mba_mbps_local = true;
>>>>>> +               else
>>>>>> +                       return -EINVAL;
>>>>>> +               return 0;
>>>>>> +       case Opt_mba_mbps_event:
>>>>>> +               if (!supports_mba_mbps())
>>>>>> +                       return -EINVAL;
>>>>>> +               if (!strcmp("mbm_local_bytes", param->string)) {
>>>>>> +                       if (!is_mbm_local_enabled())
>>>>>> +                               return -EINVAL;
>>>>>> +                       ctx->enable_mba_mbps_local = true;
>>>>>> +               } else if (!strcmp("mbm_total_bytes", param->string)) {
>>>>>> +                       if (!is_mbm_total_enabled())
>>>>>> +                               return -EINVAL;
>>>>>> +                       ctx->enable_mba_mbps_total = true;
>>>>>> +               } else {
>>>>>> +                       return -EINVAL;
>>>>>
>>>>> It looks like if I pass
>>>>> "mba_MBps_event=mbm_total_bytes,mba_MBps_event=mbm_local_bytes" I can
>>>>> set both flags true.
>>>>
>>>> That's going to be confusing. I'll add code to stop the user from
>>>> passing both options.
>>>
>>> Also kind of confusing, after reading the second patch, I realized
>>> "mba_MBps_event=mbm_total_bytes,mba_MBps" also results in both being
>>> set. If you're able to fail the mount operation if both flags somehow
>>> get set, that would address this one too.
>>
>> Are two separate flags required? All existing options within struct rdt_fs_context
>> are of type bool but that does not imply that it is the required type for
>> all. 
> 
> Reinette,
> 
> Maybe a flag and a value?  The structure becomes:
> 
> struct rdt_fs_context {
> 	struct kernfs_fs_context	kfc;
> 	bool				enable_cdpl2;
> 	bool				enable_cdpl3;
> 	bool				enable_mba_mbps;
> 	enum resctrl_event_id		mba_mbps_event;
> 	bool				enable_debug;
> };

A flag and value would work. This brings the implementation close
to the resource properties. Something that is confusing to me with
this change is the inconsistent naming:

struct rdt_fs_context:
	bool			enable_mba_mbps
	enum resctrl event_id	mba_mbps_event

struct resctrl_membw:
	bool			mba_sc
	enum resctrl_event_id	mba_mbps_event


The intention with the above naming is not obvious to me. How are
these intended to be viewed?

One option could be to view these as separately representing user
space (struct rdt_fs_context) and kernel space (struct resctrl_membw).
If this is the case then the following naming may be more intuitive:

struct rdt_fs_context:
	bool			enable_mba_mbps
	enum resctrl event_id	mba_mbps_event

struct resctrl_membw:
	bool			mba_sc
	enum resctrl_event_id	mba_sc_event



> 
> Mount option parsing (including blocking user from setting the options
> multiple times):
> 
> 	case Opt_mba_mbps:
> 		if (!supports_mba_mbps() || ctx->enable_mba_mbps)
> 			return -EINVAL;

I am not familiar with the API but it seems that invalfc() is available
to communicate a more useful message to user space than the default one
shown in changelog of patch #2.

> 		if (is_mbm_local_enabled())
> 			ctx->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
> 		else if (is_mbm_total_enabled())
> 			ctx->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
> 		else
> 			return -EINVAL;
> 		ctx->enable_mba_mbps = true;
> 		return 0;
> 	case Opt_mba_mbps_event:
> 		if (!supports_mba_mbps() || ctx->enable_mba_mbps)
> 			return -EINVAL;
> 		if (!strcmp("mbm_local_bytes", param->string))
> 			ctx->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
> 		else if (!strcmp("mbm_total_bytes", param->string))
> 			ctx->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
> 		else
> 			return -EINVAL;
> 		ctx->enable_mba_mbps = true;
> 		return 0;
> 
> 
> and use of the options to enable the feature:
> 
> 	if (ctx->enable_mba_mbps) {
> 		r->membw.mba_mbps_event = ctx->mba_mbps_event;
> 		ret = set_mba_sc(true);
> 		if (ret)
> 			goto out_cdpl3;
> 	}

Since 0 will not be used for an unset/invalid value I expect mba_mbps_event
will not (cannot) be cleared by rdt_disable_ctx(). If this is the case I think
future changes can be supported by expanding the kerneldoc of struct resctrl_membw
to document that "@mba_mbps_event (or @mba_sc_event?) is invalid if @mba_sc
is false".

Reinette
  
Luck, Tony Dec. 13, 2023, 1:07 a.m. UTC | #11
>> 	case Opt_mba_mbps:
>> 		if (!supports_mba_mbps() || ctx->enable_mba_mbps)
>> 			return -EINVAL;
>
> I am not familiar with the API but it seems that invalfc() is available
> to communicate a more useful message to user space than the default one
> shown in changelog of patch #2.

I experimented with invalfc(). It seems to be the answer to this part of the
mount error message:

       dmesg(1) may have more information after failed mount system call.

because dmesg(1) does indeed include whatever message that is provided
by the invalfc() call (including automatically adding the prefix "resctrl: ").

I'll work on incorporating your other feedback, but I'm unlikely to get it
all done and tested before the end of this week. I'll be on vacation
the last two weeks of the year. So v7 of this series will have to wait
until 2024.

Thanks

-Tony
  

Patch

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 66942d7fba7f..1feb3b2e64fa 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -129,6 +129,7 @@  enum membw_throttle_mode {
  * @throttle_mode:	Bandwidth throttling mode when threads request
  *			different memory bandwidths
  * @mba_sc:		True if MBA software controller(mba_sc) is enabled
+ * @mba_mbps_event:	Event (local or total) for mba_sc
  * @mb_map:		Mapping of memory B/W percentage to memory B/W delay
  */
 struct resctrl_membw {
@@ -138,6 +139,7 @@  struct resctrl_membw {
 	bool				arch_needs_linear;
 	enum membw_throttle_mode	throttle_mode;
 	bool				mba_sc;
+	enum resctrl_event_id		mba_mbps_event;
 	u32				*mb_map;
 };
 
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index a4f1aa15f0a2..8b9b8f664324 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -58,7 +58,8 @@  struct rdt_fs_context {
 	struct kernfs_fs_context	kfc;
 	bool				enable_cdpl2;
 	bool				enable_cdpl3;
-	bool				enable_mba_mbps;
+	bool				enable_mba_mbps_local;
+	bool				enable_mba_mbps_total;
 	bool				enable_debug;
 };
 
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index f136ac046851..d9e590f1cbc3 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -431,9 +431,10 @@  static int __mon_event_count(u32 rmid, struct rmid_read *rr)
  */
 static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
 {
-	struct mbm_state *m = &rr->d->mbm_local[rmid];
 	u64 cur_bw, bytes, cur_bytes;
+	struct mbm_state *m;
 
+	m = get_mbm_state(rr->d, rmid, rr->evtid);
 	cur_bytes = rr->val;
 	bytes = cur_bytes - m->prev_bw_bytes;
 	m->prev_bw_bytes = cur_bytes;
@@ -521,19 +522,21 @@  static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
 	u32 closid, rmid, cur_msr_val, new_msr_val;
 	struct mbm_state *pmbm_data, *cmbm_data;
 	u32 cur_bw, delta_bw, user_bw;
+	enum resctrl_event_id evt_id;
 	struct rdt_resource *r_mba;
 	struct rdt_domain *dom_mba;
 	struct list_head *head;
 	struct rdtgroup *entry;
 
-	if (!is_mbm_local_enabled())
+	if (!is_mbm_enabled())
 		return;
 
 	r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+	evt_id = r_mba->membw.mba_mbps_event;
 
 	closid = rgrp->closid;
 	rmid = rgrp->mon.rmid;
-	pmbm_data = &dom_mbm->mbm_local[rmid];
+	pmbm_data = get_mbm_state(dom_mbm, rmid, evt_id);
 
 	dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
 	if (!dom_mba) {
@@ -553,7 +556,7 @@  static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
 	 */
 	head = &rgrp->mon.crdtgrp_list;
 	list_for_each_entry(entry, head, mon.crdtgrp_list) {
-		cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
+		cmbm_data = get_mbm_state(dom_mbm, entry->mon.rmid, evt_id);
 		cur_bw += cmbm_data->prev_bw;
 		delta_bw += cmbm_data->delta_bw;
 	}
@@ -616,18 +619,14 @@  static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
 		rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
 		rr.val = 0;
 		__mon_event_count(rmid, &rr);
+		if (is_mba_sc(NULL) && rr.evtid == r->membw.mba_mbps_event)
+			mbm_bw_count(rmid, &rr);
 	}
 	if (is_mbm_local_enabled()) {
 		rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
 		rr.val = 0;
 		__mon_event_count(rmid, &rr);
-
-		/*
-		 * Call the MBA software controller only for the
-		 * control groups and when user has enabled
-		 * the software controller explicitly.
-		 */
-		if (is_mba_sc(NULL))
+		if (is_mba_sc(NULL) && rr.evtid == r->membw.mba_mbps_event)
 			mbm_bw_count(rmid, &rr);
 	}
 }
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 69a1de92384a..5f64a0b2597c 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2294,7 +2294,7 @@  static bool supports_mba_mbps(void)
 {
 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
 
-	return (is_mbm_local_enabled() &&
+	return (is_mbm_enabled() &&
 		r->alloc_capable && is_mba_linear());
 }
 
@@ -2302,7 +2302,7 @@  static bool supports_mba_mbps(void)
  * Enable or disable the MBA software controller
  * which helps user specify bandwidth in MBps.
  */
-static int set_mba_sc(bool mba_sc)
+static int set_mba_sc(bool mba_sc, enum resctrl_event_id mba_mbps_event)
 {
 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
 	u32 num_closid = resctrl_arch_get_num_closid(r);
@@ -2313,6 +2313,7 @@  static int set_mba_sc(bool mba_sc)
 		return -EINVAL;
 
 	r->membw.mba_sc = mba_sc;
+	r->membw.mba_mbps_event = mba_mbps_event;
 
 	list_for_each_entry(d, &r->domains, list) {
 		for (i = 0; i < num_closid; i++)
@@ -2445,13 +2446,14 @@  static void rdt_disable_ctx(void)
 {
 	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
 	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
-	set_mba_sc(false);
+	set_mba_sc(false, QOS_L3_MBM_LOCAL_EVENT_ID);
 
 	resctrl_debug = false;
 }
 
 static int rdt_enable_ctx(struct rdt_fs_context *ctx)
 {
+	enum resctrl_event_id mba_mbps_event;
 	int ret = 0;
 
 	if (ctx->enable_cdpl2) {
@@ -2466,8 +2468,12 @@  static int rdt_enable_ctx(struct rdt_fs_context *ctx)
 			goto out_cdpl2;
 	}
 
-	if (ctx->enable_mba_mbps) {
-		ret = set_mba_sc(true);
+	if (ctx->enable_mba_mbps_local || ctx->enable_mba_mbps_total) {
+		if (ctx->enable_mba_mbps_total)
+			mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
+		else
+			mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
+		ret = set_mba_sc(true, mba_mbps_event);
 		if (ret)
 			goto out_cdpl3;
 	}
@@ -2683,15 +2689,17 @@  enum rdt_param {
 	Opt_cdp,
 	Opt_cdpl2,
 	Opt_mba_mbps,
+	Opt_mba_mbps_event,
 	Opt_debug,
 	nr__rdt_params
 };
 
 static const struct fs_parameter_spec rdt_fs_parameters[] = {
-	fsparam_flag("cdp",		Opt_cdp),
-	fsparam_flag("cdpl2",		Opt_cdpl2),
-	fsparam_flag("mba_MBps",	Opt_mba_mbps),
-	fsparam_flag("debug",		Opt_debug),
+	fsparam_flag("cdp",			Opt_cdp),
+	fsparam_flag("cdpl2",			Opt_cdpl2),
+	fsparam_flag("mba_MBps",		Opt_mba_mbps),
+	fsparam_string("mba_MBps_event",	Opt_mba_mbps_event),
+	fsparam_flag("debug",			Opt_debug),
 	{}
 };
 
@@ -2715,7 +2723,25 @@  static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	case Opt_mba_mbps:
 		if (!supports_mba_mbps())
 			return -EINVAL;
-		ctx->enable_mba_mbps = true;
+		if (is_mbm_local_enabled())
+			ctx->enable_mba_mbps_local = true;
+		else
+			return -EINVAL;
+		return 0;
+	case Opt_mba_mbps_event:
+		if (!supports_mba_mbps())
+			return -EINVAL;
+		if (!strcmp("mbm_local_bytes", param->string)) {
+			if (!is_mbm_local_enabled())
+				return -EINVAL;
+			ctx->enable_mba_mbps_local = true;
+		} else if (!strcmp("mbm_total_bytes", param->string)) {
+			if (!is_mbm_total_enabled())
+				return -EINVAL;
+			ctx->enable_mba_mbps_total = true;
+		} else {
+			return -EINVAL;
+		}
 		return 0;
 	case Opt_debug:
 		ctx->enable_debug = true;
@@ -3780,16 +3806,27 @@  static int rdtgroup_rename(struct kernfs_node *kn,
 	return ret;
 }
 
+static char *mba_sc_event_opt_name(struct rdt_resource *r)
+{
+	if (r->membw.mba_mbps_event == QOS_L3_MBM_LOCAL_EVENT_ID)
+		return ",mba_MBps_event=mbm_local_bytes";
+	else if (r->membw.mba_mbps_event == QOS_L3_MBM_TOTAL_EVENT_ID)
+		return ",mba_MBps_event=mbm_total_bytes";
+	return "";
+}
+
 static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
 {
+	struct rdt_resource *r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+
 	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
 		seq_puts(seq, ",cdp");
 
 	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
 		seq_puts(seq, ",cdpl2");
 
-	if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
-		seq_puts(seq, ",mba_MBps");
+	if (is_mba_sc(r_mba))
+		seq_puts(seq, mba_sc_event_opt_name(r_mba));
 
 	if (resctrl_debug)
 		seq_puts(seq, ",debug");