[4/4] mm/vmstat: do not refresh stats for nohz_full CPUs

Message ID 20230530145336.155097348@redhat.com
State New
Headers
Series vmstat bug fixes for nohz_full CPUs |

Commit Message

Marcelo Tosatti May 30, 2023, 2:52 p.m. UTC
  The interruption caused by queueing work on nohz_full CPUs 
is undesirable for certain aplications.

Fix by not refreshing per-CPU stats of nohz_full CPUs. 

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---
  

Comments

Michal Hocko June 2, 2023, 10:50 a.m. UTC | #1
On Tue 30-05-23 11:52:38, Marcelo Tosatti wrote:
> The interruption caused by queueing work on nohz_full CPUs 
> is undesirable for certain aplications.
> 
> Fix by not refreshing per-CPU stats of nohz_full CPUs. 
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> ---
> 
> Index: linux-vmstat-remote/mm/vmstat.c
> ===================================================================
> --- linux-vmstat-remote.orig/mm/vmstat.c
> +++ linux-vmstat-remote/mm/vmstat.c
> @@ -1877,12 +1877,31 @@ static void refresh_vm_stats(struct work
>  	refresh_cpu_vm_stats(true);
>  }
>  
> +#ifdef CONFIG_NO_HZ_FULL
> +static inline const cpumask_t *tickless_cpumask(void)
> +{
> +	return tick_nohz_full_mask;
> +}
> +#else
> +static cpumask_t empty_cpumask;
> +static inline const cpumask_t *tickless_cpumask(void)
> +{
> +	return &empty_cpumask;
> +}
> +#endif
> +
>  int vmstat_refresh(struct ctl_table *table, int write,
>  		   void *buffer, size_t *lenp, loff_t *ppos)
>  {
>  	long val;
>  	int err;
>  	int i;
> +	cpumask_var_t dstmask;
> +
> +	if (!alloc_cpumask_var(&dstmask, GFP_KERNEL))
> +		return -ENOMEM;
> +
> +	cpumask_andnot(dstmask, cpu_possible_mask, tickless_cpumask());

Similar feedback to the patch 2. wouldn't it make more sense to opencode
schedule_on_each_cpu here and use cpu_is_isolated instead?

>  
>  	/*
>  	 * The regular update, every sysctl_stat_interval, may come later
> @@ -1896,7 +1915,9 @@ int vmstat_refresh(struct ctl_table *tab
>  	 * transiently negative values, report an error here if any of
>  	 * the stats is negative, so we know to go looking for imbalance.
>  	 */
> -	err = schedule_on_each_cpu(refresh_vm_stats);
> +	err = schedule_on_each_cpumask(refresh_vm_stats, dstmask);
> +	free_cpumask_var(dstmask);
> +
>  	if (err)
>  		return err;
>  	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
>
  

Patch

Index: linux-vmstat-remote/mm/vmstat.c
===================================================================
--- linux-vmstat-remote.orig/mm/vmstat.c
+++ linux-vmstat-remote/mm/vmstat.c
@@ -1877,12 +1877,31 @@  static void refresh_vm_stats(struct work
 	refresh_cpu_vm_stats(true);
 }
 
+#ifdef CONFIG_NO_HZ_FULL
+static inline const cpumask_t *tickless_cpumask(void)
+{
+	return tick_nohz_full_mask;
+}
+#else
+static cpumask_t empty_cpumask;
+static inline const cpumask_t *tickless_cpumask(void)
+{
+	return &empty_cpumask;
+}
+#endif
+
 int vmstat_refresh(struct ctl_table *table, int write,
 		   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	long val;
 	int err;
 	int i;
+	cpumask_var_t dstmask;
+
+	if (!alloc_cpumask_var(&dstmask, GFP_KERNEL))
+		return -ENOMEM;
+
+	cpumask_andnot(dstmask, cpu_possible_mask, tickless_cpumask());
 
 	/*
 	 * The regular update, every sysctl_stat_interval, may come later
@@ -1896,7 +1915,9 @@  int vmstat_refresh(struct ctl_table *tab
 	 * transiently negative values, report an error here if any of
 	 * the stats is negative, so we know to go looking for imbalance.
 	 */
-	err = schedule_on_each_cpu(refresh_vm_stats);
+	err = schedule_on_each_cpumask(refresh_vm_stats, dstmask);
+	free_cpumask_var(dstmask);
+
 	if (err)
 		return err;
 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {