[net] net: fec: tx processing does not call XDP APIs if budget is 0

Message ID 20230725074148.2936402-1-wei.fang@nxp.com
State New
Headers
Series [net] net: fec: tx processing does not call XDP APIs if budget is 0 |

Commit Message

Wei Fang July 25, 2023, 7:41 a.m. UTC
  According to the clarification [1] in the latest napi.rst, the tx
processing cannot call any XDP (or page pool) APIs if the "budget"
is 0. Because NAPI is called with the budget of 0 (such as netpoll)
indicates we may be in an IRQ context, however, we cannot use the
page pool from IRQ context.

[1] https://lore.kernel.org/all/20230720161323.2025379-1-kuba@kernel.org/

Fixes: 20f797399035 ("net: fec: recycle pages for transmitted XDP frames")
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Suggested-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/fec_main.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)
  

Comments

Alexander Duyck July 25, 2023, 4:51 p.m. UTC | #1
On Tue, 2023-07-25 at 15:41 +0800, Wei Fang wrote:
> According to the clarification [1] in the latest napi.rst, the tx
> processing cannot call any XDP (or page pool) APIs if the "budget"
> is 0. Because NAPI is called with the budget of 0 (such as netpoll)
> indicates we may be in an IRQ context, however, we cannot use the
> page pool from IRQ context.
> 
> [1] https://lore.kernel.org/all/20230720161323.2025379-1-kuba@kernel.org/
> 
> Fixes: 20f797399035 ("net: fec: recycle pages for transmitted XDP frames")
> Signed-off-by: Wei Fang <wei.fang@nxp.com>
> Suggested-by: Jakub Kicinski <kuba@kernel.org>
> ---
>  drivers/net/ethernet/freescale/fec_main.c | 16 ++++++++++++----
>  1 file changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> index 073d61619336..66b5cbdb43b9 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -1372,7 +1372,7 @@ fec_enet_hwtstamp(struct fec_enet_private *fep, unsigned ts,
>  }
>  
>  static void
> -fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
> +fec_enet_tx_queue(struct net_device *ndev, u16 queue_id, int budget)
>  {
>  	struct	fec_enet_private *fep;
>  	struct xdp_frame *xdpf;
> @@ -1416,6 +1416,14 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
>  			if (!skb)
>  				goto tx_buf_done;
>  		} else {
> +			/* Tx processing cannot call any XDP (or page pool) APIs if
> +			 * the "budget" is 0. Because NAPI is called with budget of
> +			 * 0 (such as netpoll) indicates we may be in an IRQ context,
> +			 * however, we can't use the page pool from IRQ context.
> +			 */
> +			if (unlikely(!budget))
> +				break;
> +
>  			xdpf = txq->tx_buf[index].xdp;
>  			if (bdp->cbd_bufaddr)
>  				dma_unmap_single(&fep->pdev->dev,

This statement isn't correct. There are napi enabled and non-napi
versions of these calls. This is the reason for things like the
"allow_direct" parameter in page_pool_put_full_page and the
"napi_direct" parameter in __xdp_return.

By blocking on these cases you can end up hanging the Tx queue which is
going to break netpoll as you are going to stall the ring on XDP
packets if they are already in the queue.

From what I can tell your driver is using xdp_return_frame in the case
of an XDP frame which doesn't make use of the NAPI optimizations in
freeing from what I can tell. The NAPI optimized version is
xdp_return_frame_rx.

> @@ -1508,14 +1516,14 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
>  		writel(0, txq->bd.reg_desc_active);
>  }
>  
> -static void fec_enet_tx(struct net_device *ndev)
> +static void fec_enet_tx(struct net_device *ndev, int budget)
>  {
>  	struct fec_enet_private *fep = netdev_priv(ndev);
>  	int i;
>  
>  	/* Make sure that AVB queues are processed first. */
>  	for (i = fep->num_tx_queues - 1; i >= 0; i--)
> -		fec_enet_tx_queue(ndev, i);
> +		fec_enet_tx_queue(ndev, i, budget);
>  }
>  
>  static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
> @@ -1858,7 +1866,7 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
>  
>  	do {
>  		done += fec_enet_rx(ndev, budget - done);
> -		fec_enet_tx(ndev);
> +		fec_enet_tx(ndev, budget);
>  	} while ((done < budget) && fec_enet_collect_events(fep));
>  
>  	if (done < budget) {

Since you are passing budget, one optimization you could make use of
would be napi_consume_skb in your Tx path instead of dev_kfree_skb_any.
  
Wei Fang July 26, 2023, 3:40 a.m. UTC | #2
Hi Alexander,

> > @@ -1416,6 +1416,14 @@ fec_enet_tx_queue(struct net_device *ndev,
> u16 queue_id)
> >  			if (!skb)
> >  				goto tx_buf_done;
> >  		} else {
> > +			/* Tx processing cannot call any XDP (or page pool) APIs if
> > +			 * the "budget" is 0. Because NAPI is called with budget of
> > +			 * 0 (such as netpoll) indicates we may be in an IRQ context,
> > +			 * however, we can't use the page pool from IRQ context.
> > +			 */
> > +			if (unlikely(!budget))
> > +				break;
> > +
> >  			xdpf = txq->tx_buf[index].xdp;
> >  			if (bdp->cbd_bufaddr)
> >  				dma_unmap_single(&fep->pdev->dev,
> 
> This statement isn't correct. There are napi enabled and non-napi
> versions of these calls. This is the reason for things like the
> "allow_direct" parameter in page_pool_put_full_page and the
> "napi_direct" parameter in __xdp_return.
> 
> By blocking on these cases you can end up hanging the Tx queue which is
> going to break netpoll as you are going to stall the ring on XDP
> packets if they are already in the queue.
> 
> From what I can tell your driver is using xdp_return_frame in the case
> of an XDP frame which doesn't make use of the NAPI optimizations in
> freeing from what I can tell. The NAPI optimized version is
> xdp_return_frame_rx.
> 
So you mean it is safe to use xdp_return_frame no matter in NAPI context
or non-NAPI context? And xdp_return_frame_rx_napi must be used in NAPI
context? If so, I think I must have misunderstood, then this patch is not necessary.

> > @@ -1508,14 +1516,14 @@ fec_enet_tx_queue(struct net_device *ndev,
> u16 queue_id)
> >  		writel(0, txq->bd.reg_desc_active);
> >  }
> >
> > -static void fec_enet_tx(struct net_device *ndev)
> > +static void fec_enet_tx(struct net_device *ndev, int budget)
> >  {
> >  	struct fec_enet_private *fep = netdev_priv(ndev);
> >  	int i;
> >
> >  	/* Make sure that AVB queues are processed first. */
> >  	for (i = fep->num_tx_queues - 1; i >= 0; i--)
> > -		fec_enet_tx_queue(ndev, i);
> > +		fec_enet_tx_queue(ndev, i, budget);
> >  }
> >
> >  static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
> > @@ -1858,7 +1866,7 @@ static int fec_enet_rx_napi(struct napi_struct
> *napi, int budget)
> >
> >  	do {
> >  		done += fec_enet_rx(ndev, budget - done);
> > -		fec_enet_tx(ndev);
> > +		fec_enet_tx(ndev, budget);
> >  	} while ((done < budget) && fec_enet_collect_events(fep));
> >
> >  	if (done < budget) {
> 
> Since you are passing budget, one optimization you could make use of
> would be napi_consume_skb in your Tx path instead of dev_kfree_skb_any.
That's good suggestion, I think I can add this optimization in my XDP_TX support
patch. Thanks!
  
Alexander Duyck July 26, 2023, 3:53 p.m. UTC | #3
On Tue, Jul 25, 2023 at 8:40 PM Wei Fang <wei.fang@nxp.com> wrote:
>
> Hi Alexander,
>
> > > @@ -1416,6 +1416,14 @@ fec_enet_tx_queue(struct net_device *ndev,
> > u16 queue_id)
> > >                     if (!skb)
> > >                             goto tx_buf_done;
> > >             } else {
> > > +                   /* Tx processing cannot call any XDP (or page pool) APIs if
> > > +                    * the "budget" is 0. Because NAPI is called with budget of
> > > +                    * 0 (such as netpoll) indicates we may be in an IRQ context,
> > > +                    * however, we can't use the page pool from IRQ context.
> > > +                    */
> > > +                   if (unlikely(!budget))
> > > +                           break;
> > > +
> > >                     xdpf = txq->tx_buf[index].xdp;
> > >                     if (bdp->cbd_bufaddr)
> > >                             dma_unmap_single(&fep->pdev->dev,
> >
> > This statement isn't correct. There are napi enabled and non-napi
> > versions of these calls. This is the reason for things like the
> > "allow_direct" parameter in page_pool_put_full_page and the
> > "napi_direct" parameter in __xdp_return.
> >
> > By blocking on these cases you can end up hanging the Tx queue which is
> > going to break netpoll as you are going to stall the ring on XDP
> > packets if they are already in the queue.
> >
> > From what I can tell your driver is using xdp_return_frame in the case
> > of an XDP frame which doesn't make use of the NAPI optimizations in
> > freeing from what I can tell. The NAPI optimized version is
> > xdp_return_frame_rx.
> >
> So you mean it is safe to use xdp_return_frame no matter in NAPI context
> or non-NAPI context? And xdp_return_frame_rx_napi must be used in NAPI
> context? If so, I think I must have misunderstood, then this patch is not necessary.

Actually after talking with Jakub a bit more there is an issue here,
but not freeing the frames isn't the solution. We likely need to just
fix the page pool code so that it doesn't attempt to recycle the
frames if operating in IRQ context.

The way this is dealt with for skbs is that we queue skbs if we are in
IRQ context so that it can be deferred to be freed by the
net_tx_action. We likely need to look at doing something similar for
page_pool pages or XDP frames.

> > > @@ -1508,14 +1516,14 @@ fec_enet_tx_queue(struct net_device *ndev,
> > u16 queue_id)
> > >             writel(0, txq->bd.reg_desc_active);
> > >  }
> > >
> > > -static void fec_enet_tx(struct net_device *ndev)
> > > +static void fec_enet_tx(struct net_device *ndev, int budget)
> > >  {
> > >     struct fec_enet_private *fep = netdev_priv(ndev);
> > >     int i;
> > >
> > >     /* Make sure that AVB queues are processed first. */
> > >     for (i = fep->num_tx_queues - 1; i >= 0; i--)
> > > -           fec_enet_tx_queue(ndev, i);
> > > +           fec_enet_tx_queue(ndev, i, budget);
> > >  }
> > >
> > >  static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
> > > @@ -1858,7 +1866,7 @@ static int fec_enet_rx_napi(struct napi_struct
> > *napi, int budget)
> > >
> > >     do {
> > >             done += fec_enet_rx(ndev, budget - done);
> > > -           fec_enet_tx(ndev);
> > > +           fec_enet_tx(ndev, budget);
> > >     } while ((done < budget) && fec_enet_collect_events(fep));
> > >
> > >     if (done < budget) {
> >
> > Since you are passing budget, one optimization you could make use of
> > would be napi_consume_skb in your Tx path instead of dev_kfree_skb_any.
> That's good suggestion, I think I can add this optimization in my XDP_TX support
> patch. Thanks!
  
Wei Fang July 27, 2023, 2:08 a.m. UTC | #4
> > > This statement isn't correct. There are napi enabled and non-napi
> > > versions of these calls. This is the reason for things like the
> > > "allow_direct" parameter in page_pool_put_full_page and the
> > > "napi_direct" parameter in __xdp_return.
> > >
> > > By blocking on these cases you can end up hanging the Tx queue which is
> > > going to break netpoll as you are going to stall the ring on XDP
> > > packets if they are already in the queue.
> > >
> > > From what I can tell your driver is using xdp_return_frame in the case
> > > of an XDP frame which doesn't make use of the NAPI optimizations in
> > > freeing from what I can tell. The NAPI optimized version is
> > > xdp_return_frame_rx.
> > >
> > So you mean it is safe to use xdp_return_frame no matter in NAPI context
> > or non-NAPI context? And xdp_return_frame_rx_napi must be used in NAPI
> > context? If so, I think I must have misunderstood, then this patch is not
> necessary.
> 
> Actually after talking with Jakub a bit more there is an issue here,
> but not freeing the frames isn't the solution. We likely need to just
> fix the page pool code so that it doesn't attempt to recycle the
> frames if operating in IRQ context.
> 
> The way this is dealt with for skbs is that we queue skbs if we are in
> IRQ context so that it can be deferred to be freed by the
> net_tx_action. We likely need to look at doing something similar for
> page_pool pages or XDP frames.
> 
After reading your discussion with Jakub, I understand this issue a bit more.
But we are not sure when this issue will be fixed in page pool, currently we
can only tolerate a delay in sending of a netpoll message. So I think this patch
is necessary, and I will refine it in the future when the page pool has fixed the
issue. In addition, as you mentioned before, napi_consume_skb should be
used to instead of dev_kfree_skb_any, so I will improve this patch in version 2.
Thanks.
  
Jakub Kicinski July 27, 2023, 4:14 a.m. UTC | #5
On Thu, 27 Jul 2023 02:08:32 +0000 Wei Fang wrote:
> > Actually after talking with Jakub a bit more there is an issue here,
> > but not freeing the frames isn't the solution. We likely need to just
> > fix the page pool code so that it doesn't attempt to recycle the
> > frames if operating in IRQ context.
> > 
> > The way this is dealt with for skbs is that we queue skbs if we are in
> > IRQ context so that it can be deferred to be freed by the
> > net_tx_action. We likely need to look at doing something similar for
> > page_pool pages or XDP frames.
> >   
> After reading your discussion with Jakub, I understand this issue a bit more.
> But we are not sure when this issue will be fixed in page pool, currently we
> can only tolerate a delay in sending of a netpoll message. So I think this patch
> is necessary, and I will refine it in the future when the page pool has fixed the
> issue. In addition, as you mentioned before, napi_consume_skb should be
> used to instead of dev_kfree_skb_any, so I will improve this patch in version 2.

I think so too, since the patch can only help, you already wrote it and
it won't be extra backporting work since the code is only present in
6.5 - I think it's worth applying. And we can refine things as page pool
limitations get listed (the napi_consume_skb() is net-next material,
anyway).
  
patchwork-bot+netdevbpf@kernel.org July 27, 2023, 4:20 a.m. UTC | #6
Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Tue, 25 Jul 2023 15:41:48 +0800 you wrote:
> According to the clarification [1] in the latest napi.rst, the tx
> processing cannot call any XDP (or page pool) APIs if the "budget"
> is 0. Because NAPI is called with the budget of 0 (such as netpoll)
> indicates we may be in an IRQ context, however, we cannot use the
> page pool from IRQ context.
> 
> [1] https://lore.kernel.org/all/20230720161323.2025379-1-kuba@kernel.org/
> 
> [...]

Here is the summary with links:
  - [net] net: fec: tx processing does not call XDP APIs if budget is 0
    https://git.kernel.org/netdev/net/c/15cec633fc7b

You are awesome, thank you!
  
Wei Fang July 27, 2023, 5:32 a.m. UTC | #7
> On Thu, 27 Jul 2023 02:08:32 +0000 Wei Fang wrote:
> > > Actually after talking with Jakub a bit more there is an issue here,
> > > but not freeing the frames isn't the solution. We likely need to
> > > just fix the page pool code so that it doesn't attempt to recycle
> > > the frames if operating in IRQ context.
> > >
> > > The way this is dealt with for skbs is that we queue skbs if we are
> > > in IRQ context so that it can be deferred to be freed by the
> > > net_tx_action. We likely need to look at doing something similar for
> > > page_pool pages or XDP frames.
> > >
> > After reading your discussion with Jakub, I understand this issue a bit more.
> > But we are not sure when this issue will be fixed in page pool,
> > currently we can only tolerate a delay in sending of a netpoll
> > message. So I think this patch is necessary, and I will refine it in
> > the future when the page pool has fixed the issue. In addition, as you
> > mentioned before, napi_consume_skb should be used to instead of
> dev_kfree_skb_any, so I will improve this patch in version 2.
> 
> I think so too, since the patch can only help, you already wrote it and it won't
> be extra backporting work since the code is only present in
> 6.5 - I think it's worth applying. And we can refine things as page pool
> limitations get listed (the napi_consume_skb() is net-next material, anyway).
Okay, thank you. :)
  

Patch

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 073d61619336..66b5cbdb43b9 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1372,7 +1372,7 @@  fec_enet_hwtstamp(struct fec_enet_private *fep, unsigned ts,
 }
 
 static void
-fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
+fec_enet_tx_queue(struct net_device *ndev, u16 queue_id, int budget)
 {
 	struct	fec_enet_private *fep;
 	struct xdp_frame *xdpf;
@@ -1416,6 +1416,14 @@  fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
 			if (!skb)
 				goto tx_buf_done;
 		} else {
+			/* Tx processing cannot call any XDP (or page pool) APIs if
+			 * the "budget" is 0. Because NAPI is called with budget of
+			 * 0 (such as netpoll) indicates we may be in an IRQ context,
+			 * however, we can't use the page pool from IRQ context.
+			 */
+			if (unlikely(!budget))
+				break;
+
 			xdpf = txq->tx_buf[index].xdp;
 			if (bdp->cbd_bufaddr)
 				dma_unmap_single(&fep->pdev->dev,
@@ -1508,14 +1516,14 @@  fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
 		writel(0, txq->bd.reg_desc_active);
 }
 
-static void fec_enet_tx(struct net_device *ndev)
+static void fec_enet_tx(struct net_device *ndev, int budget)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	int i;
 
 	/* Make sure that AVB queues are processed first. */
 	for (i = fep->num_tx_queues - 1; i >= 0; i--)
-		fec_enet_tx_queue(ndev, i);
+		fec_enet_tx_queue(ndev, i, budget);
 }
 
 static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
@@ -1858,7 +1866,7 @@  static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
 
 	do {
 		done += fec_enet_rx(ndev, budget - done);
-		fec_enet_tx(ndev);
+		fec_enet_tx(ndev, budget);
 	} while ((done < budget) && fec_enet_collect_events(fep));
 
 	if (done < budget) {