diff mbox series

[net-next] net: fec: add XDP_TX feature support

Message ID	20230717103709.2629372-1-wei.fang@nxp.com
State	New
Headers	Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) client-ip=2620:137:e000::1:20; From: Wei Fang <wei.fang@nxp.com> To: davem@davemloft.net, edumazet@google.com, kuba@kernel.org, pabeni@redhat.com, ast@kernel.org, daniel@iogearbox.net, hawk@kernel.org, john.fastabend@gmail.com, xiaoning.wang@nxp.com, shenwei.wang@nxp.com, netdev@vger.kernel.org Cc: linux-imx@nxp.com, linux-kernel@vger.kernel.org, bpf@vger.kernel.org Subject: [PATCH net-next] net: fec: add XDP_TX feature support Date: Mon, 17 Jul 2023 18:37:09 +0800 Message-Id: <20230717103709.2629372-1-wei.fang@nxp.com> Content-Transfer-Encoding: 8bit Content-Type: text/plain MIME-Version: 1.0 Precedence: bulk
Series	[net-next] net: fec: add XDP_TX feature support \| [net-next] net: fec: add XDP_TX feature support

Commit Message

Wei Fang July 17, 2023, 10:37 a.m. UTC

  The XDP_TX feature is not supported before, and all the frames
which are deemed to do XDP_TX action actually do the XDP_DROP
action. So this patch adds the XDP_TX support to FEC driver.

I tested the performance of XDP_TX feature in XDP_DRV and XDP_SKB
modes on i.MX8MM-EVK and i.MX8MP-EVK platforms respectively, and
the test steps and results are as follows.

Step 1: Board A connects to the FEC port of the DUT and runs the
pktgen_sample03_burst_single_flow.sh script to generate and send
burst traffic to DUT. Note that the length of packet was set to
64 bytes and the procotol of packet was UDP in my test scenario.

Step 2: The DUT runs the xdp2 program to transmit received UDP
packets back out on the same port where they were received.

root@imx8mmevk:~# ./xdp2 eth0
proto 17:     150326 pkt/s
proto 17:     141920 pkt/s
proto 17:     147338 pkt/s
proto 17:     140783 pkt/s
proto 17:     150400 pkt/s
proto 17:     134651 pkt/s
proto 17:     134676 pkt/s
proto 17:     134959 pkt/s
proto 17:     148152 pkt/s
proto 17:     149885 pkt/s

root@imx8mmevk:~# ./xdp2 -S eth0
proto 17:     131094 pkt/s
proto 17:     134691 pkt/s
proto 17:     138930 pkt/s
proto 17:     129347 pkt/s
proto 17:     133050 pkt/s
proto 17:     132932 pkt/s
proto 17:     136628 pkt/s
proto 17:     132964 pkt/s
proto 17:     131265 pkt/s
proto 17:     135794 pkt/s

root@imx8mpevk:~# ./xdp2 eth
proto 17:     135817 pkt/s
proto 17:     142776 pkt/s
proto 17:     142237 pkt/s
proto 17:     135673 pkt/s
proto 17:     139508 pkt/s
proto 17:     147340 pkt/s
proto 17:     133329 pkt/s
proto 17:     141171 pkt/s
proto 17:     146917 pkt/s
proto 17:     135488 pkt/s

root@imx8mpevk:~# ./xdp2 -S eth0
proto 17:     133150 pkt/s
proto 17:     133127 pkt/s
proto 17:     133538 pkt/s
proto 17:     133094 pkt/s
proto 17:     133690 pkt/s
proto 17:     133199 pkt/s
proto 17:     133905 pkt/s
proto 17:     132908 pkt/s
proto 17:     133292 pkt/s
proto 17:     133511 pkt/s

Signed-off-by: Wei Fang <wei.fang@nxp.com>
---
 drivers/net/ethernet/freescale/fec.h      |  1 +
 drivers/net/ethernet/freescale/fec_main.c | 81 ++++++++++++++++++-----
 2 files changed, 66 insertions(+), 16 deletions(-)

Comments

Alexander Lobakin July 18, 2023, 3:14 p.m. UTC | #1

From: Wei Fang <wei.fang@nxp.com>
Date: Mon, 17 Jul 2023 18:37:09 +0800

> The XDP_TX feature is not supported before, and all the frames
> which are deemed to do XDP_TX action actually do the XDP_DROP
> action. So this patch adds the XDP_TX support to FEC driver.

[...]

> @@ -3897,6 +3923,29 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
>  	return 0;
>  }
>  
> +static int fec_enet_xdp_tx_xmit(struct net_device *ndev,
> +				struct xdp_buff *xdp)
> +{
> +	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);

Have you tried avoid converting buff to frame in case of XDP_TX? It
would save you a bunch of CPU cycles.

> +	struct fec_enet_private *fep = netdev_priv(ndev);
> +	struct fec_enet_priv_tx_q *txq;
> +	int cpu = smp_processor_id();
> +	struct netdev_queue *nq;
> +	int queue, ret;
> +
> +	queue = fec_enet_xdp_get_tx_queue(fep, cpu);
> +	txq = fep->tx_queue[queue];
> +	nq = netdev_get_tx_queue(fep->netdev, queue);
> +
> +	__netif_tx_lock(nq, cpu);
> +
> +	ret = fec_enet_txq_xmit_frame(fep, txq, xdpf, false);
> +
> +	__netif_tx_unlock(nq);
> +
> +	return ret;
> +}
> +
>  static int fec_enet_xdp_xmit(struct net_device *dev,
>  			     int num_frames,
>  			     struct xdp_frame **frames,
> @@ -3917,7 +3966,7 @@ static int fec_enet_xdp_xmit(struct net_device *dev,
>  	__netif_tx_lock(nq, cpu);
>  
>  	for (i = 0; i < num_frames; i++) {
> -		if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) < 0)
> +		if (fec_enet_txq_xmit_frame(fep, txq, frames[i], true) < 0)
>  			break;
>  		sent_frames++;
>  	}

Thanks,
Olek

Wei Fang July 19, 2023, 3:28 a.m. UTC | #2

> -----Original Message-----
> From: Alexander Lobakin <aleksander.lobakin@intel.com>
> Sent: 2023年7月18日 23:15
> To: Wei Fang <wei.fang@nxp.com>
> Cc: davem@davemloft.net; edumazet@google.com; kuba@kernel.org;
> pabeni@redhat.com; ast@kernel.org; daniel@iogearbox.net;
> hawk@kernel.org; john.fastabend@gmail.com; Clark Wang
> <xiaoning.wang@nxp.com>; Shenwei Wang <shenwei.wang@nxp.com>;
> netdev@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>;
> linux-kernel@vger.kernel.org; bpf@vger.kernel.org
> Subject: Re: [PATCH net-next] net: fec: add XDP_TX feature support
> 
> From: Wei Fang <wei.fang@nxp.com>
> Date: Mon, 17 Jul 2023 18:37:09 +0800
> 
> > The XDP_TX feature is not supported before, and all the frames which
> > are deemed to do XDP_TX action actually do the XDP_DROP action. So
> > this patch adds the XDP_TX support to FEC driver.
> 
> [...]
> 
> > @@ -3897,6 +3923,29 @@ static int fec_enet_txq_xmit_frame(struct
> fec_enet_private *fep,
> >  	return 0;
> >  }
> >
> > +static int fec_enet_xdp_tx_xmit(struct net_device *ndev,
> > +				struct xdp_buff *xdp)
> > +{
> > +	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
> 
> Have you tried avoid converting buff to frame in case of XDP_TX? It would save
> you a bunch of CPU cycles.
> 
Sorry, I haven't. I referred to several ethernet drivers about the implementation of
XDP_TX. Most drivers adopt the method of converting xdp_buff to xdp_frame, and
in this method, I can reuse the existing interface fec_enet_txq_xmit_frame() to
transmit the frames and the implementation is relatively simple. Otherwise, there
will be more changes and more effort is needed to implement this feature.
Thanks!

> > +	struct fec_enet_private *fep = netdev_priv(ndev);
> > +	struct fec_enet_priv_tx_q *txq;
> > +	int cpu = smp_processor_id();
> > +	struct netdev_queue *nq;
> > +	int queue, ret;
> > +
> > +	queue = fec_enet_xdp_get_tx_queue(fep, cpu);
> > +	txq = fep->tx_queue[queue];
> > +	nq = netdev_get_tx_queue(fep->netdev, queue);
> > +
> > +	__netif_tx_lock(nq, cpu);
> > +
> > +	ret = fec_enet_txq_xmit_frame(fep, txq, xdpf, false);
> > +
> > +	__netif_tx_unlock(nq);
> > +
> > +	return ret;
> > +}
> > +
> >  static int fec_enet_xdp_xmit(struct net_device *dev,
> >  			     int num_frames,
> >  			     struct xdp_frame **frames,
> > @@ -3917,7 +3966,7 @@ static int fec_enet_xdp_xmit(struct net_device
> *dev,
> >  	__netif_tx_lock(nq, cpu);
> >
> >  	for (i = 0; i < num_frames; i++) {
> > -		if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) < 0)
> > +		if (fec_enet_txq_xmit_frame(fep, txq, frames[i], true) < 0)
> >  			break;
> >  		sent_frames++;
> >  	}
>

Alexander Lobakin July 19, 2023, 4:46 p.m. UTC | #3

From: Wei Fang <wei.fang@nxp.com>
Date: Wed, 19 Jul 2023 03:28:26 +0000

>> -----Original Message-----
>> From: Alexander Lobakin <aleksander.lobakin@intel.com>
>> Sent: 2023年7月18日 23:15
>> To: Wei Fang <wei.fang@nxp.com>
>> Cc: davem@davemloft.net; edumazet@google.com; kuba@kernel.org;
>> pabeni@redhat.com; ast@kernel.org; daniel@iogearbox.net;
>> hawk@kernel.org; john.fastabend@gmail.com; Clark Wang
>> <xiaoning.wang@nxp.com>; Shenwei Wang <shenwei.wang@nxp.com>;
>> netdev@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>;
>> linux-kernel@vger.kernel.org; bpf@vger.kernel.org
>> Subject: Re: [PATCH net-next] net: fec: add XDP_TX feature support
>>
>> From: Wei Fang <wei.fang@nxp.com>
>> Date: Mon, 17 Jul 2023 18:37:09 +0800
>>
>>> The XDP_TX feature is not supported before, and all the frames which
>>> are deemed to do XDP_TX action actually do the XDP_DROP action. So
>>> this patch adds the XDP_TX support to FEC driver.
>>
>> [...]
>>
>>> @@ -3897,6 +3923,29 @@ static int fec_enet_txq_xmit_frame(struct
>> fec_enet_private *fep,
>>>  	return 0;
>>>  }
>>>
>>> +static int fec_enet_xdp_tx_xmit(struct net_device *ndev,
>>> +				struct xdp_buff *xdp)
>>> +{
>>> +	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
>>
>> Have you tried avoid converting buff to frame in case of XDP_TX? It would save
>> you a bunch of CPU cycles.
>>
> Sorry, I haven't. I referred to several ethernet drivers about the implementation of
> XDP_TX. Most drivers adopt the method of converting xdp_buff to xdp_frame, and
> in this method, I can reuse the existing interface fec_enet_txq_xmit_frame() to
> transmit the frames and the implementation is relatively simple. Otherwise, there
> will be more changes and more effort is needed to implement this feature.
> Thanks!

No problem, it is just FYI, as we observe worse performance when
convert_buff_to_frame() is used for XDP_TX versus when you transmit the
xdp_buff directly. The main reason is that converting to XDP frame
touches ->data_hard_start cacheline (usually untouched), while xdp_buff
is always on the stack and hot.
It is up to you what to pick for your driver obviously :)

> 
>>> +	struct fec_enet_private *fep = netdev_priv(ndev);
>>> +	struct fec_enet_priv_tx_q *txq;
>>> +	int cpu = smp_processor_id();
>>> +	struct netdev_queue *nq;
>>> +	int queue, ret;
>>> +
>>> +	queue = fec_enet_xdp_get_tx_queue(fep, cpu);
>>> +	txq = fep->tx_queue[queue];
>>> +	nq = netdev_get_tx_queue(fep->netdev, queue);
>>> +
>>> +	__netif_tx_lock(nq, cpu);
>>> +
>>> +	ret = fec_enet_txq_xmit_frame(fep, txq, xdpf, false);
>>> +
>>> +	__netif_tx_unlock(nq);
>>> +
>>> +	return ret;
>>> +}
>>> +
>>>  static int fec_enet_xdp_xmit(struct net_device *dev,
>>>  			     int num_frames,
>>>  			     struct xdp_frame **frames,
>>> @@ -3917,7 +3966,7 @@ static int fec_enet_xdp_xmit(struct net_device
>> *dev,
>>>  	__netif_tx_lock(nq, cpu);
>>>
>>>  	for (i = 0; i < num_frames; i++) {
>>> -		if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) < 0)
>>> +		if (fec_enet_txq_xmit_frame(fep, txq, frames[i], true) < 0)
>>>  			break;
>>>  		sent_frames++;
>>>  	}
>>
> 

Thanks,
Olek

Wei Fang July 20, 2023, 2:44 a.m. UTC | #4

> -----Original Message-----
> From: Alexander Lobakin <aleksander.lobakin@intel.com>
> Sent: 2023年7月20日 0:46
> To: Wei Fang <wei.fang@nxp.com>
> Cc: davem@davemloft.net; edumazet@google.com; kuba@kernel.org;
> pabeni@redhat.com; ast@kernel.org; daniel@iogearbox.net;
> hawk@kernel.org; john.fastabend@gmail.com; Clark Wang
> <xiaoning.wang@nxp.com>; Shenwei Wang <shenwei.wang@nxp.com>;
> netdev@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>;
> linux-kernel@vger.kernel.org; bpf@vger.kernel.org
> Subject: Re: [PATCH net-next] net: fec: add XDP_TX feature support
> 
> From: Wei Fang <wei.fang@nxp.com>
> Date: Wed, 19 Jul 2023 03:28:26 +0000
> 
> >> -----Original Message-----
> >> From: Alexander Lobakin <aleksander.lobakin@intel.com>
> >> Sent: 2023年7月18日 23:15
> >> To: Wei Fang <wei.fang@nxp.com>
> >> Cc: davem@davemloft.net; edumazet@google.com; kuba@kernel.org;
> >> pabeni@redhat.com; ast@kernel.org; daniel@iogearbox.net;
> >> hawk@kernel.org; john.fastabend@gmail.com; Clark Wang
> >> <xiaoning.wang@nxp.com>; Shenwei Wang <shenwei.wang@nxp.com>;
> >> netdev@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>;
> >> linux-kernel@vger.kernel.org; bpf@vger.kernel.org
> >> Subject: Re: [PATCH net-next] net: fec: add XDP_TX feature support
> >>
> >> From: Wei Fang <wei.fang@nxp.com>
> >> Date: Mon, 17 Jul 2023 18:37:09 +0800
> >>
> >>> The XDP_TX feature is not supported before, and all the frames which
> >>> are deemed to do XDP_TX action actually do the XDP_DROP action. So
> >>> this patch adds the XDP_TX support to FEC driver.
> >>
> >> [...]
> >>
> >>> @@ -3897,6 +3923,29 @@ static int fec_enet_txq_xmit_frame(struct
> >> fec_enet_private *fep,
> >>>  	return 0;
> >>>  }
> >>>
> >>> +static int fec_enet_xdp_tx_xmit(struct net_device *ndev,
> >>> +				struct xdp_buff *xdp)
> >>> +{
> >>> +	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
> >>
> >> Have you tried avoid converting buff to frame in case of XDP_TX? It would
> save
> >> you a bunch of CPU cycles.
> >>
> > Sorry, I haven't. I referred to several ethernet drivers about the
> implementation of
> > XDP_TX. Most drivers adopt the method of converting xdp_buff to xdp_frame,
> and
> > in this method, I can reuse the existing interface fec_enet_txq_xmit_frame()
> to
> > transmit the frames and the implementation is relatively simple. Otherwise,
> there
> > will be more changes and more effort is needed to implement this feature.
> > Thanks!
> 
> No problem, it is just FYI, as we observe worse performance when
> convert_buff_to_frame() is used for XDP_TX versus when you transmit the
> xdp_buff directly. The main reason is that converting to XDP frame
> touches ->data_hard_start cacheline (usually untouched), while xdp_buff
> is always on the stack and hot.
> It is up to you what to pick for your driver obviously :)
> 
Thanks for your information. For now, the current XDP_TX performance can meet
our expectation. I'll keep your suggestion in mind and try your suggestion if we have
higher performance requirement. :D

> >
> >>> +	struct fec_enet_private *fep = netdev_priv(ndev);
> >>> +	struct fec_enet_priv_tx_q *txq;
> >>> +	int cpu = smp_processor_id();
> >>> +	struct netdev_queue *nq;
> >>> +	int queue, ret;
> >>> +
> >>> +	queue = fec_enet_xdp_get_tx_queue(fep, cpu);
> >>> +	txq = fep->tx_queue[queue];
> >>> +	nq = netdev_get_tx_queue(fep->netdev, queue);
> >>> +
> >>> +	__netif_tx_lock(nq, cpu);
> >>> +
> >>> +	ret = fec_enet_txq_xmit_frame(fep, txq, xdpf, false);
> >>> +
> >>> +	__netif_tx_unlock(nq);
> >>> +
> >>> +	return ret;
> >>> +}
> >>> +
> >>>  static int fec_enet_xdp_xmit(struct net_device *dev,
> >>>  			     int num_frames,
> >>>  			     struct xdp_frame **frames,
> >>> @@ -3917,7 +3966,7 @@ static int fec_enet_xdp_xmit(struct net_device
> >> *dev,
> >>>  	__netif_tx_lock(nq, cpu);
> >>>
> >>>  	for (i = 0; i < num_frames; i++) {
> >>> -		if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) < 0)
> >>> +		if (fec_enet_txq_xmit_frame(fep, txq, frames[i], true) < 0)
> >>>  			break;
> >>>  		sent_frames++;
> >>>  	}
> >>
> >
> 
> Thanks,
> Olek

Jakub Kicinski July 20, 2023, 3:45 a.m. UTC | #5

On Mon, 17 Jul 2023 18:37:09 +0800 Wei Fang wrote:
> -			xdp_return_frame(xdpf);
> +			if (txq->tx_buf[index].type == FEC_TXBUF_T_XDP_NDO)
> +				xdp_return_frame(xdpf);
> +			else
> +				xdp_return_frame_rx_napi(xdpf);

Are you taking budget into account? When NAPI is called with budget 
of 0 we are *not* in napi / softirq context. You can't be processing
any XDP tx under such conditions (it may be a netpoll call from IRQ
context).

> +static int fec_enet_xdp_tx_xmit(struct net_device *ndev,
> +				struct xdp_buff *xdp)
> +{
> +	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
> +	struct fec_enet_private *fep = netdev_priv(ndev);
> +	struct fec_enet_priv_tx_q *txq;
> +	int cpu = smp_processor_id();
> +	struct netdev_queue *nq;
> +	int queue, ret;
> +
> +	queue = fec_enet_xdp_get_tx_queue(fep, cpu);
> +	txq = fep->tx_queue[queue];
> +	nq = netdev_get_tx_queue(fep->netdev, queue);
> +
> +	__netif_tx_lock(nq, cpu);
> +
> +	ret = fec_enet_txq_xmit_frame(fep, txq, xdpf, false);
> +
> +	__netif_tx_unlock(nq);

If you're reusing the same queues as the stack you need to call
txq_trans_cond_update() at some point, otherwise the stack may
print a splat complaining the queue got stuck.

Wei Fang July 20, 2023, 7:06 a.m. UTC | #6

> -----Original Message-----
> From: Jakub Kicinski <kuba@kernel.org>
> Sent: 2023年7月20日 11:46
> To: Wei Fang <wei.fang@nxp.com>
> Cc: davem@davemloft.net; edumazet@google.com; pabeni@redhat.com;
> ast@kernel.org; daniel@iogearbox.net; hawk@kernel.org;
> john.fastabend@gmail.com; Clark Wang <xiaoning.wang@nxp.com>; Shenwei
> Wang <shenwei.wang@nxp.com>; netdev@vger.kernel.org; dl-linux-imx
> <linux-imx@nxp.com>; linux-kernel@vger.kernel.org; bpf@vger.kernel.org
> Subject: Re: [PATCH net-next] net: fec: add XDP_TX feature support
> 
> On Mon, 17 Jul 2023 18:37:09 +0800 Wei Fang wrote:
> > -			xdp_return_frame(xdpf);
> > +			if (txq->tx_buf[index].type == FEC_TXBUF_T_XDP_NDO)
> > +				xdp_return_frame(xdpf);
> > +			else
> > +				xdp_return_frame_rx_napi(xdpf);
> 
> Are you taking budget into account? When NAPI is called with budget of 0 we
> are *not* in napi / softirq context. You can't be processing any XDP tx under
> such conditions (it may be a netpoll call from IRQ context).
Actually, the fec driver never takes the budget into account for cleaning up tx BD
ring. The budget is only valid for rx.

> 
> > +static int fec_enet_xdp_tx_xmit(struct net_device *ndev,
> > +				struct xdp_buff *xdp)
> > +{
> > +	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
> > +	struct fec_enet_private *fep = netdev_priv(ndev);
> > +	struct fec_enet_priv_tx_q *txq;
> > +	int cpu = smp_processor_id();
> > +	struct netdev_queue *nq;
> > +	int queue, ret;
> > +
> > +	queue = fec_enet_xdp_get_tx_queue(fep, cpu);
> > +	txq = fep->tx_queue[queue];
> > +	nq = netdev_get_tx_queue(fep->netdev, queue);
> > +
> > +	__netif_tx_lock(nq, cpu);
> > +
> > +	ret = fec_enet_txq_xmit_frame(fep, txq, xdpf, false);
> > +
> > +	__netif_tx_unlock(nq);
> 
> If you're reusing the same queues as the stack you need to call
> txq_trans_cond_update() at some point, otherwise the stack may print a splat
> complaining the queue got stuck.
Yes, you are absolutely right. I'll add txq_trans_cond_update() in the next
version. Thanks!

Jakub Kicinski July 20, 2023, 3:24 p.m. UTC | #7

On Thu, 20 Jul 2023 07:06:05 +0000 Wei Fang wrote:
> > Are you taking budget into account? When NAPI is called with budget of 0 we
> > are *not* in napi / softirq context. You can't be processing any XDP tx under
> > such conditions (it may be a netpoll call from IRQ context).  
> 
> Actually, the fec driver never takes the budget into account for cleaning up tx BD
> ring. The budget is only valid for rx.

I know, that's what I'm complaining about. XDP can only run in normal
NAPI context, i.e. when NAPI is called with budget != 0. That works out
without any changes on Rx, if budget is zero drivers already don't
process Rx. But similar change must be done on Tx when adding XDP
support. You can still process all normal skb packets on Tx when budget
is 0 (in fact you should), but you _can't_ process any XDP Tx frame.

Wei Fang July 21, 2023, 2:29 a.m. UTC | #8

> -----Original Message-----
> From: Jakub Kicinski <kuba@kernel.org>
> Sent: 2023年7月20日 23:25
> To: Wei Fang <wei.fang@nxp.com>
> Cc: davem@davemloft.net; edumazet@google.com; pabeni@redhat.com;
> ast@kernel.org; daniel@iogearbox.net; hawk@kernel.org;
> john.fastabend@gmail.com; Clark Wang <xiaoning.wang@nxp.com>; Shenwei
> Wang <shenwei.wang@nxp.com>; netdev@vger.kernel.org; dl-linux-imx
> <linux-imx@nxp.com>; linux-kernel@vger.kernel.org; bpf@vger.kernel.org
> Subject: Re: [PATCH net-next] net: fec: add XDP_TX feature support
> 
> On Thu, 20 Jul 2023 07:06:05 +0000 Wei Fang wrote:
> > > Are you taking budget into account? When NAPI is called with budget
> > > of 0 we are *not* in napi / softirq context. You can't be processing
> > > any XDP tx under such conditions (it may be a netpoll call from IRQ
> context).
> >
> > Actually, the fec driver never takes the budget into account for
> > cleaning up tx BD ring. The budget is only valid for rx.
> 
> I know, that's what I'm complaining about. XDP can only run in normal NAPI
> context, i.e. when NAPI is called with budget != 0. That works out without any
> changes on Rx, if budget is zero drivers already don't process Rx. But similar
> change must be done on Tx when adding XDP support. You can still process all
> normal skb packets on Tx when budget is 0 (in fact you should), but you
> _can't_ process any XDP Tx frame.
Sorry, I did not realize that we can not process any tx XDP packet if the "budget"
is 0. I noticed your latest clarification [1] in napi.rst, I believe it will help many
people avoid this problem like me. Thank you very much.
[1]: https://lore.kernel.org/netdev/20230720161323.2025379-1-kuba@kernel.org/T/

diff mbox series

Patch

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 63a053dea819..e4b5ae4884d9 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -547,6 +547,7 @@  enum {
 enum fec_txbuf_type {
 	FEC_TXBUF_T_SKB,
 	FEC_TXBUF_T_XDP_NDO,
+	FEC_TXBUF_T_XDP_TX,
 };
 
 struct fec_tx_buffer {
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 1b990a486059..1063552980bc 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -75,6 +75,8 @@ 
 
 static void set_multicast_list(struct net_device *ndev);
 static void fec_enet_itr_coal_set(struct net_device *ndev);
+static int fec_enet_xdp_tx_xmit(struct net_device *ndev,
+				struct xdp_buff *xdp);
 
 #define DRIVER_NAME	"fec"
 
@@ -962,7 +964,8 @@  static void fec_enet_bd_init(struct net_device *dev)
 					txq->tx_buf[i].skb = NULL;
 				}
 			} else {
-				if (bdp->cbd_bufaddr)
+				if (bdp->cbd_bufaddr &&
+				    txq->tx_buf[i].type == FEC_TXBUF_T_XDP_NDO)
 					dma_unmap_single(&fep->pdev->dev,
 							 fec32_to_cpu(bdp->cbd_bufaddr),
 							 fec16_to_cpu(bdp->cbd_datlen),
@@ -1417,7 +1420,8 @@  fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
 				goto tx_buf_done;
 		} else {
 			xdpf = txq->tx_buf[index].xdp;
-			if (bdp->cbd_bufaddr)
+			if (bdp->cbd_bufaddr &&
+			    txq->tx_buf[index].type == FEC_TXBUF_T_XDP_NDO)
 				dma_unmap_single(&fep->pdev->dev,
 						 fec32_to_cpu(bdp->cbd_bufaddr),
 						 fec16_to_cpu(bdp->cbd_datlen),
@@ -1476,7 +1480,10 @@  fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
 			/* Free the sk buffer associated with this last transmit */
 			dev_kfree_skb_any(skb);
 		} else {
-			xdp_return_frame(xdpf);
+			if (txq->tx_buf[index].type == FEC_TXBUF_T_XDP_NDO)
+				xdp_return_frame(xdpf);
+			else
+				xdp_return_frame_rx_napi(xdpf);
 
 			txq->tx_buf[index].xdp = NULL;
 			/* restore default tx buffer type: FEC_TXBUF_T_SKB */
@@ -1567,11 +1574,18 @@  fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
 		}
 		break;
 
-	default:
-		bpf_warn_invalid_xdp_action(fep->netdev, prog, act);
-		fallthrough;
-
 	case XDP_TX:
+		err = fec_enet_xdp_tx_xmit(fep->netdev, xdp);
+		if (err) {
+			ret = FEC_ENET_XDP_CONSUMED;
+			page = virt_to_head_page(xdp->data);
+			page_pool_put_page(rxq->page_pool, page, sync, true);
+		} else {
+			ret = FEC_ENET_XDP_TX;
+		}
+		break;
+
+	default:
 		bpf_warn_invalid_xdp_action(fep->netdev, prog, act);
 		fallthrough;
 
@@ -3827,7 +3841,8 @@  fec_enet_xdp_get_tx_queue(struct fec_enet_private *fep, int index)
 
 static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
 				   struct fec_enet_priv_tx_q *txq,
-				   struct xdp_frame *frame)
+				   struct xdp_frame *frame,
+				   bool ndo_xmit)
 {
 	unsigned int index, status, estatus;
 	struct bufdesc *bdp;
@@ -3847,10 +3862,24 @@  static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
 
 	index = fec_enet_get_bd_index(bdp, &txq->bd);
 
-	dma_addr = dma_map_single(&fep->pdev->dev, frame->data,
-				  frame->len, DMA_TO_DEVICE);
-	if (dma_mapping_error(&fep->pdev->dev, dma_addr))
-		return -ENOMEM;
+	if (ndo_xmit) {
+		dma_addr = dma_map_single(&fep->pdev->dev, frame->data,
+					  frame->len, DMA_TO_DEVICE);
+		if (dma_mapping_error(&fep->pdev->dev, dma_addr))
+			return -ENOMEM;
+
+		txq->tx_buf[index].type = FEC_TXBUF_T_XDP_NDO;
+	} else {
+		struct page *page = virt_to_page(frame->data);
+
+		dma_addr = page_pool_get_dma_addr(page) + sizeof(*frame) +
+			   frame->headroom;
+		dma_sync_single_for_device(&fep->pdev->dev, dma_addr,
+					   frame->len, DMA_BIDIRECTIONAL);
+		txq->tx_buf[index].type = FEC_TXBUF_T_XDP_TX;
+	}
+
+	txq->tx_buf[index].xdp = frame;
 
 	status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
 	if (fep->bufdesc_ex)
@@ -3869,9 +3898,6 @@  static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
 		ebdp->cbd_esc = cpu_to_fec32(estatus);
 	}
 
-	txq->tx_buf[index].type = FEC_TXBUF_T_XDP_NDO;
-	txq->tx_buf[index].xdp = frame;
-
 	/* Make sure the updates to rest of the descriptor are performed before
 	 * transferring ownership.
 	 */
@@ -3897,6 +3923,29 @@  static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
 	return 0;
 }
 
+static int fec_enet_xdp_tx_xmit(struct net_device *ndev,
+				struct xdp_buff *xdp)
+{
+	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
+	struct fec_enet_private *fep = netdev_priv(ndev);
+	struct fec_enet_priv_tx_q *txq;
+	int cpu = smp_processor_id();
+	struct netdev_queue *nq;
+	int queue, ret;
+
+	queue = fec_enet_xdp_get_tx_queue(fep, cpu);
+	txq = fep->tx_queue[queue];
+	nq = netdev_get_tx_queue(fep->netdev, queue);
+
+	__netif_tx_lock(nq, cpu);
+
+	ret = fec_enet_txq_xmit_frame(fep, txq, xdpf, false);
+
+	__netif_tx_unlock(nq);
+
+	return ret;
+}
+
 static int fec_enet_xdp_xmit(struct net_device *dev,
 			     int num_frames,
 			     struct xdp_frame **frames,
@@ -3917,7 +3966,7 @@  static int fec_enet_xdp_xmit(struct net_device *dev,
 	__netif_tx_lock(nq, cpu);
 
 	for (i = 0; i < num_frames; i++) {
-		if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) < 0)
+		if (fec_enet_txq_xmit_frame(fep, txq, frames[i], true) < 0)
 			break;
 		sent_frames++;
 	}