diff mbox series

[4/4] mm/vmstat: do not refresh stats for nohz_full CPUs

Message ID	20230530145336.155097348@redhat.com
State	New
Headers	Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) client-ip=2620:137:e000::1:20; Message-ID: <20230530145336.155097348@redhat.com> User-Agent: quilt/0.67 Date: Tue, 30 May 2023 11:52:38 -0300 From: Marcelo Tosatti <mtosatti@redhat.com> To: Christoph Lameter <cl@linux.com> Cc: Aaron Tomlin <atomlin@atomlin.com>, Frederic Weisbecker <frederic@kernel.org>, Andrew Morton <akpm@linux-foundation.org>, linux-kernel@vger.kernel.org, linux-mm@kvack.org, Vlastimil Babka <vbabka@suse.cz>, Michal Hocko <mhocko@suse.com>, Marcelo Tosatti <mtosatti@redhat.com> Subject: [PATCH 4/4] mm/vmstat: do not refresh stats for nohz_full CPUs References: <20230530145234.968927611@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Precedence: bulk
Series	vmstat bug fixes for nohz_full CPUs \| [0/4] vmstat bug fixes for nohz_full CPUs [1/4] vmstat: allow_direct_reclaim should use zone_page_state_snapshot [2/4] vmstat: skip periodic vmstat update for nohz full CPUs [3/4] workqueue: add schedule_on_each_cpumask helper [4/4] mm/vmstat: do not refresh stats for nohz_full CPUs

Commit Message

Marcelo Tosatti May 30, 2023, 2:52 p.m. UTC

  The interruption caused by queueing work on nohz_full CPUs 
is undesirable for certain aplications.

Fix by not refreshing per-CPU stats of nohz_full CPUs. 

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---

Comments

Michal Hocko June 2, 2023, 10:50 a.m. UTC | #1

On Tue 30-05-23 11:52:38, Marcelo Tosatti wrote:
> The interruption caused by queueing work on nohz_full CPUs 
> is undesirable for certain aplications.
> 
> Fix by not refreshing per-CPU stats of nohz_full CPUs. 
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> ---
> 
> Index: linux-vmstat-remote/mm/vmstat.c
> ===================================================================
> --- linux-vmstat-remote.orig/mm/vmstat.c
> +++ linux-vmstat-remote/mm/vmstat.c
> @@ -1877,12 +1877,31 @@ static void refresh_vm_stats(struct work
>  	refresh_cpu_vm_stats(true);
>  }
>  
> +#ifdef CONFIG_NO_HZ_FULL
> +static inline const cpumask_t *tickless_cpumask(void)
> +{
> +	return tick_nohz_full_mask;
> +}
> +#else
> +static cpumask_t empty_cpumask;
> +static inline const cpumask_t *tickless_cpumask(void)
> +{
> +	return &empty_cpumask;
> +}
> +#endif
> +
>  int vmstat_refresh(struct ctl_table *table, int write,
>  		   void *buffer, size_t *lenp, loff_t *ppos)
>  {
>  	long val;
>  	int err;
>  	int i;
> +	cpumask_var_t dstmask;
> +
> +	if (!alloc_cpumask_var(&dstmask, GFP_KERNEL))
> +		return -ENOMEM;
> +
> +	cpumask_andnot(dstmask, cpu_possible_mask, tickless_cpumask());

Similar feedback to the patch 2. wouldn't it make more sense to opencode
schedule_on_each_cpu here and use cpu_is_isolated instead?

>  
>  	/*
>  	 * The regular update, every sysctl_stat_interval, may come later
> @@ -1896,7 +1915,9 @@ int vmstat_refresh(struct ctl_table *tab
>  	 * transiently negative values, report an error here if any of
>  	 * the stats is negative, so we know to go looking for imbalance.
>  	 */
> -	err = schedule_on_each_cpu(refresh_vm_stats);
> +	err = schedule_on_each_cpumask(refresh_vm_stats, dstmask);
> +	free_cpumask_var(dstmask);
> +
>  	if (err)
>  		return err;
>  	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
>

diff mbox series

Patch

Index: linux-vmstat-remote/mm/vmstat.c
===================================================================
--- linux-vmstat-remote.orig/mm/vmstat.c
+++ linux-vmstat-remote/mm/vmstat.c
@@ -1877,12 +1877,31 @@  static void refresh_vm_stats(struct work
 	refresh_cpu_vm_stats(true);
 }
 
+#ifdef CONFIG_NO_HZ_FULL
+static inline const cpumask_t *tickless_cpumask(void)
+{
+	return tick_nohz_full_mask;
+}
+#else
+static cpumask_t empty_cpumask;
+static inline const cpumask_t *tickless_cpumask(void)
+{
+	return &empty_cpumask;
+}
+#endif
+
 int vmstat_refresh(struct ctl_table *table, int write,
 		   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	long val;
 	int err;
 	int i;
+	cpumask_var_t dstmask;
+
+	if (!alloc_cpumask_var(&dstmask, GFP_KERNEL))
+		return -ENOMEM;
+
+	cpumask_andnot(dstmask, cpu_possible_mask, tickless_cpumask());
 
 	/*
 	 * The regular update, every sysctl_stat_interval, may come later
@@ -1896,7 +1915,9 @@  int vmstat_refresh(struct ctl_table *tab
 	 * transiently negative values, report an error here if any of
 	 * the stats is negative, so we know to go looking for imbalance.
 	 */
-	err = schedule_on_each_cpu(refresh_vm_stats);
+	err = schedule_on_each_cpumask(refresh_vm_stats, dstmask);
+	free_cpumask_var(dstmask);
+
 	if (err)
 		return err;
 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {