[v2,net-next,2/2] net: introduce budget_squeeze to help us tune rx behavior

Message ID 20230314030532.9238-3-kerneljasonxing@gmail.com
State New
Headers
Series add some detailed data when reading softnet_stat |

Commit Message

Jason Xing March 14, 2023, 3:05 a.m. UTC
  From: Jason Xing <kernelxing@tencent.com>

When we encounter some performance issue and then get lost on how
to tune the budget limit and time limit in net_rx_action() function,
we can separately counting both of them to avoid the confusion.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
v2:
1) change the coding style suggested by Stephen and Simon
2) Keep the display of the old data (time_squeeze) untouched suggested
by Kui-Feng
Link: https://lore.kernel.org/lkml/20230311163614.92296-1-kerneljasonxing@gmail.com/
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 12 ++++++++----
 net/core/net-procfs.c     |  9 ++++++---
 3 files changed, 15 insertions(+), 7 deletions(-)
  

Comments

Simon Horman March 14, 2023, 12:03 p.m. UTC | #1
On Tue, Mar 14, 2023 at 11:05:32AM +0800, Jason Xing wrote:
> From: Jason Xing <kernelxing@tencent.com>
> 
> When we encounter some performance issue and then get lost on how
> to tune the budget limit and time limit in net_rx_action() function,
> we can separately counting both of them to avoid the confusion.
> 
> Signed-off-by: Jason Xing <kernelxing@tencent.com>

As per my comment on patch 1/2, I'd drop the "/* keep it untouched */"
comment.

That notwithstanding:

Reviewed-by: Simon Horman <simon.horman@corigine.com>

> ---
> v2:
> 1) change the coding style suggested by Stephen and Simon
> 2) Keep the display of the old data (time_squeeze) untouched suggested
> by Kui-Feng
> Link: https://lore.kernel.org/lkml/20230311163614.92296-1-kerneljasonxing@gmail.com/
> ---
>  include/linux/netdevice.h |  1 +
>  net/core/dev.c            | 12 ++++++++----
>  net/core/net-procfs.c     |  9 ++++++---
>  3 files changed, 15 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 6a14b7b11766..5736311a2133 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -3157,6 +3157,7 @@ struct softnet_data {
>  	/* stats */
>  	unsigned int		processed;
>  	unsigned int		time_squeeze;
> +	unsigned int		budget_squeeze;
>  #ifdef CONFIG_RPS
>  	struct softnet_data	*rps_ipi_list;
>  #endif
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 253584777101..1518a366783b 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6637,6 +6637,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
>  	unsigned long time_limit = jiffies +
>  		usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
>  	int budget = READ_ONCE(netdev_budget);
> +	bool done = false;
>  	LIST_HEAD(list);
>  	LIST_HEAD(repoll);
>  
> @@ -6644,7 +6645,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
>  	list_splice_init(&sd->poll_list, &list);
>  	local_irq_enable();
>  
> -	for (;;) {
> +	while (!done) {
>  		struct napi_struct *n;
>  
>  		skb_defer_free_flush(sd);
> @@ -6662,10 +6663,13 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
>  		 * Allow this to run for 2 jiffies since which will allow
>  		 * an average latency of 1.5/HZ.
>  		 */
> -		if (unlikely(budget <= 0 ||
> -			     time_after_eq(jiffies, time_limit))) {
> +		if (unlikely(budget <= 0)) {
> +			sd->budget_squeeze++;
> +			done = true;
> +		}
> +		if (unlikely(time_after_eq(jiffies, time_limit))) {
>  			sd->time_squeeze++;
> -			break;
> +			done = true;
>  		}
>  	}
>  
> diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
> index 2809b663e78d..25810ee46a04 100644
> --- a/net/core/net-procfs.c
> +++ b/net/core/net-procfs.c
> @@ -179,14 +179,17 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
>  	 */
>  	seq_printf(seq,
>  		   "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x "
> -		   "%08x %08x\n",
> -		   sd->processed, sd->dropped, sd->time_squeeze, 0,
> +		   "%08x %08x %08x %08x\n",
> +		   sd->processed, sd->dropped,
> +		   sd->time_squeeze + sd->budget_squeeze, /* keep it untouched */
> +		   0,
>  		   0, 0, 0, 0, /* was fastroute */
>  		   0,	/* was cpu_collision */
>  		   sd->received_rps, flow_limit_count,
>  		   softnet_backlog_len(sd),	/* keep it untouched */
>  		   (int)seq->index,
> -		   softnet_input_pkt_queue_len(sd), softnet_process_queue_len(sd));
> +		   softnet_input_pkt_queue_len(sd), softnet_process_queue_len(sd),
> +		   sd->time_squeeze, sd->budget_squeeze);
>  	return 0;
>  }
>  
> -- 
> 2.37.3
>
  
Jason Xing March 14, 2023, 12:31 p.m. UTC | #2
On Tue, Mar 14, 2023 at 8:04 PM Simon Horman <simon.horman@corigine.com> wrote:
>
> On Tue, Mar 14, 2023 at 11:05:32AM +0800, Jason Xing wrote:
> > From: Jason Xing <kernelxing@tencent.com>
> >
> > When we encounter some performance issue and then get lost on how
> > to tune the budget limit and time limit in net_rx_action() function,
> > we can separately counting both of them to avoid the confusion.
> >
> > Signed-off-by: Jason Xing <kernelxing@tencent.com>
>
> As per my comment on patch 1/2, I'd drop the "/* keep it untouched */"
> comment.

I think you're right. I'll drop this.

Thanks,
Jason

>
> That notwithstanding:
>
> Reviewed-by: Simon Horman <simon.horman@corigine.com>
>
> > ---
> > v2:
> > 1) change the coding style suggested by Stephen and Simon
> > 2) Keep the display of the old data (time_squeeze) untouched suggested
> > by Kui-Feng
> > Link: https://lore.kernel.org/lkml/20230311163614.92296-1-kerneljasonxing@gmail.com/
> > ---
> >  include/linux/netdevice.h |  1 +
> >  net/core/dev.c            | 12 ++++++++----
> >  net/core/net-procfs.c     |  9 ++++++---
> >  3 files changed, 15 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index 6a14b7b11766..5736311a2133 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -3157,6 +3157,7 @@ struct softnet_data {
> >       /* stats */
> >       unsigned int            processed;
> >       unsigned int            time_squeeze;
> > +     unsigned int            budget_squeeze;
> >  #ifdef CONFIG_RPS
> >       struct softnet_data     *rps_ipi_list;
> >  #endif
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index 253584777101..1518a366783b 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -6637,6 +6637,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
> >       unsigned long time_limit = jiffies +
> >               usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
> >       int budget = READ_ONCE(netdev_budget);
> > +     bool done = false;
> >       LIST_HEAD(list);
> >       LIST_HEAD(repoll);
> >
> > @@ -6644,7 +6645,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
> >       list_splice_init(&sd->poll_list, &list);
> >       local_irq_enable();
> >
> > -     for (;;) {
> > +     while (!done) {
> >               struct napi_struct *n;
> >
> >               skb_defer_free_flush(sd);
> > @@ -6662,10 +6663,13 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
> >                * Allow this to run for 2 jiffies since which will allow
> >                * an average latency of 1.5/HZ.
> >                */
> > -             if (unlikely(budget <= 0 ||
> > -                          time_after_eq(jiffies, time_limit))) {
> > +             if (unlikely(budget <= 0)) {
> > +                     sd->budget_squeeze++;
> > +                     done = true;
> > +             }
> > +             if (unlikely(time_after_eq(jiffies, time_limit))) {
> >                       sd->time_squeeze++;
> > -                     break;
> > +                     done = true;
> >               }
> >       }
> >
> > diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
> > index 2809b663e78d..25810ee46a04 100644
> > --- a/net/core/net-procfs.c
> > +++ b/net/core/net-procfs.c
> > @@ -179,14 +179,17 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
> >        */
> >       seq_printf(seq,
> >                  "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x "
> > -                "%08x %08x\n",
> > -                sd->processed, sd->dropped, sd->time_squeeze, 0,
> > +                "%08x %08x %08x %08x\n",
> > +                sd->processed, sd->dropped,
> > +                sd->time_squeeze + sd->budget_squeeze, /* keep it untouched */
> > +                0,
> >                  0, 0, 0, 0, /* was fastroute */
> >                  0,   /* was cpu_collision */
> >                  sd->received_rps, flow_limit_count,
> >                  softnet_backlog_len(sd),     /* keep it untouched */
> >                  (int)seq->index,
> > -                softnet_input_pkt_queue_len(sd), softnet_process_queue_len(sd));
> > +                softnet_input_pkt_queue_len(sd), softnet_process_queue_len(sd),
> > +                sd->time_squeeze, sd->budget_squeeze);
> >       return 0;
> >  }
> >
> > --
> > 2.37.3
> >
  

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6a14b7b11766..5736311a2133 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3157,6 +3157,7 @@  struct softnet_data {
 	/* stats */
 	unsigned int		processed;
 	unsigned int		time_squeeze;
+	unsigned int		budget_squeeze;
 #ifdef CONFIG_RPS
 	struct softnet_data	*rps_ipi_list;
 #endif
diff --git a/net/core/dev.c b/net/core/dev.c
index 253584777101..1518a366783b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6637,6 +6637,7 @@  static __latent_entropy void net_rx_action(struct softirq_action *h)
 	unsigned long time_limit = jiffies +
 		usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
 	int budget = READ_ONCE(netdev_budget);
+	bool done = false;
 	LIST_HEAD(list);
 	LIST_HEAD(repoll);
 
@@ -6644,7 +6645,7 @@  static __latent_entropy void net_rx_action(struct softirq_action *h)
 	list_splice_init(&sd->poll_list, &list);
 	local_irq_enable();
 
-	for (;;) {
+	while (!done) {
 		struct napi_struct *n;
 
 		skb_defer_free_flush(sd);
@@ -6662,10 +6663,13 @@  static __latent_entropy void net_rx_action(struct softirq_action *h)
 		 * Allow this to run for 2 jiffies since which will allow
 		 * an average latency of 1.5/HZ.
 		 */
-		if (unlikely(budget <= 0 ||
-			     time_after_eq(jiffies, time_limit))) {
+		if (unlikely(budget <= 0)) {
+			sd->budget_squeeze++;
+			done = true;
+		}
+		if (unlikely(time_after_eq(jiffies, time_limit))) {
 			sd->time_squeeze++;
-			break;
+			done = true;
 		}
 	}
 
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 2809b663e78d..25810ee46a04 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -179,14 +179,17 @@  static int softnet_seq_show(struct seq_file *seq, void *v)
 	 */
 	seq_printf(seq,
 		   "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x "
-		   "%08x %08x\n",
-		   sd->processed, sd->dropped, sd->time_squeeze, 0,
+		   "%08x %08x %08x %08x\n",
+		   sd->processed, sd->dropped,
+		   sd->time_squeeze + sd->budget_squeeze, /* keep it untouched */
+		   0,
 		   0, 0, 0, 0, /* was fastroute */
 		   0,	/* was cpu_collision */
 		   sd->received_rps, flow_limit_count,
 		   softnet_backlog_len(sd),	/* keep it untouched */
 		   (int)seq->index,
-		   softnet_input_pkt_queue_len(sd), softnet_process_queue_len(sd));
+		   softnet_input_pkt_queue_len(sd), softnet_process_queue_len(sd),
+		   sd->time_squeeze, sd->budget_squeeze);
 	return 0;
 }