[net-next,v4,6/7] net: lan966x: Add support for XDP_TX

Message ID 20221122214413.3446006-7-horatiu.vultur@microchip.com
State New
Headers
Series net: lan966x: Extend xdp support |

Commit Message

Horatiu Vultur Nov. 22, 2022, 9:44 p.m. UTC
  Extend lan966x XDP support with the action XDP_TX. In this case when the
received buffer needs to execute XDP_TX, the buffer will be moved to the
TX buffers. So a new RX buffer will be allocated.
When the TX finish with the frame, it would give back the buffer to the
page pool.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
---
 .../ethernet/microchip/lan966x/lan966x_fdma.c | 78 +++++++++++++++++--
 .../ethernet/microchip/lan966x/lan966x_main.c |  4 +-
 .../ethernet/microchip/lan966x/lan966x_main.h |  8 ++
 .../ethernet/microchip/lan966x/lan966x_xdp.c  |  8 ++
 4 files changed, 90 insertions(+), 8 deletions(-)
  

Comments

Maciej Fijalkowski Nov. 22, 2022, 10:27 p.m. UTC | #1
On Tue, Nov 22, 2022 at 10:44:12PM +0100, Horatiu Vultur wrote:
> Extend lan966x XDP support with the action XDP_TX. In this case when the
> received buffer needs to execute XDP_TX, the buffer will be moved to the
> TX buffers. So a new RX buffer will be allocated.
> When the TX finish with the frame, it would give back the buffer to the
> page pool.
> 
> Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
> ---
>  .../ethernet/microchip/lan966x/lan966x_fdma.c | 78 +++++++++++++++++--
>  .../ethernet/microchip/lan966x/lan966x_main.c |  4 +-
>  .../ethernet/microchip/lan966x/lan966x_main.h |  8 ++
>  .../ethernet/microchip/lan966x/lan966x_xdp.c  |  8 ++
>  4 files changed, 90 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
> index f8287a6a86ed5..23e1cad0f5d37 100644
> --- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
> +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
> @@ -411,12 +411,18 @@ static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight)
>  		dcb_buf->dev->stats.tx_bytes += dcb_buf->len;
>  
>  		dcb_buf->used = false;
> -		dma_unmap_single(lan966x->dev,
> -				 dcb_buf->dma_addr,
> -				 dcb_buf->len,
> -				 DMA_TO_DEVICE);
> -		if (!dcb_buf->ptp)
> -			dev_kfree_skb_any(dcb_buf->skb);
> +		if (dcb_buf->skb) {
> +			dma_unmap_single(lan966x->dev,
> +					 dcb_buf->dma_addr,
> +					 dcb_buf->len,
> +					 DMA_TO_DEVICE);
> +
> +			if (!dcb_buf->ptp)
> +				napi_consume_skb(dcb_buf->skb, weight);
> +		}
> +
> +		if (dcb_buf->xdpf)
> +			xdp_return_frame_rx_napi(dcb_buf->xdpf);
>  
>  		clear = true;
>  	}
> @@ -549,6 +555,9 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight)
>  			lan966x_fdma_rx_free_page(rx);
>  			lan966x_fdma_rx_advance_dcb(rx);
>  			goto allocate_new;
> +		case FDMA_TX:
> +			lan966x_fdma_rx_advance_dcb(rx);
> +			continue;
>  		case FDMA_DROP:
>  			lan966x_fdma_rx_free_page(rx);
>  			lan966x_fdma_rx_advance_dcb(rx);
> @@ -670,6 +679,62 @@ static void lan966x_fdma_tx_start(struct lan966x_tx *tx, int next_to_use)
>  	tx->last_in_use = next_to_use;
>  }
>  
> +int lan966x_fdma_xmit_xdpf(struct lan966x_port *port,
> +			   struct xdp_frame *xdpf,
> +			   struct page *page)
> +{
> +	struct lan966x *lan966x = port->lan966x;
> +	struct lan966x_tx_dcb_buf *next_dcb_buf;
> +	struct lan966x_tx *tx = &lan966x->tx;
> +	dma_addr_t dma_addr;
> +	int next_to_use;
> +	__be32 *ifh;
> +	int ret = 0;
> +
> +	spin_lock(&lan966x->tx_lock);
> +
> +	/* Get next index */
> +	next_to_use = lan966x_fdma_get_next_dcb(tx);
> +	if (next_to_use < 0) {
> +		netif_stop_queue(port->dev);
> +		ret = NETDEV_TX_BUSY;
> +		goto out;
> +	}
> +
> +	/* Generate new IFH */
> +	ifh = page_address(page) + XDP_PACKET_HEADROOM;
> +	memset(ifh, 0x0, sizeof(__be32) * IFH_LEN);
> +	lan966x_ifh_set_bypass(ifh, 1);
> +	lan966x_ifh_set_port(ifh, BIT_ULL(port->chip_port));
> +
> +	dma_addr = page_pool_get_dma_addr(page);
> +	dma_sync_single_for_device(lan966x->dev, dma_addr + XDP_PACKET_HEADROOM,
> +				   xdpf->len + IFH_LEN_BYTES,
> +				   DMA_TO_DEVICE);
> +
> +	/* Setup next dcb */
> +	lan966x_fdma_tx_setup_dcb(tx, next_to_use, xdpf->len + IFH_LEN_BYTES,
> +				  dma_addr + XDP_PACKET_HEADROOM);
> +
> +	/* Fill up the buffer */
> +	next_dcb_buf = &tx->dcbs_buf[next_to_use];
> +	next_dcb_buf->skb = NULL;
> +	next_dcb_buf->xdpf = xdpf;
> +	next_dcb_buf->len = xdpf->len + IFH_LEN_BYTES;
> +	next_dcb_buf->dma_addr = dma_addr;
> +	next_dcb_buf->used = true;
> +	next_dcb_buf->ptp = false;
> +	next_dcb_buf->dev = port->dev;
> +
> +	/* Start the transmission */
> +	lan966x_fdma_tx_start(tx, next_to_use);
> +
> +out:
> +	spin_unlock(&lan966x->tx_lock);
> +
> +	return ret;
> +}
> +
>  int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
>  {
>  	struct lan966x_port *port = netdev_priv(dev);
> @@ -726,6 +791,7 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
>  	/* Fill up the buffer */
>  	next_dcb_buf = &tx->dcbs_buf[next_to_use];
>  	next_dcb_buf->skb = skb;
> +	next_dcb_buf->xdpf = NULL;
>  	next_dcb_buf->len = skb->len;
>  	next_dcb_buf->dma_addr = dma_addr;
>  	next_dcb_buf->used = true;
> diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
> index 42be5d0f1f015..0b7707306da26 100644
> --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
> +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
> @@ -302,13 +302,13 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb,
>  	return NETDEV_TX_BUSY;
>  }
>  
> -static void lan966x_ifh_set_bypass(void *ifh, u64 bypass)
> +void lan966x_ifh_set_bypass(void *ifh, u64 bypass)
>  {
>  	packing(ifh, &bypass, IFH_POS_BYPASS + IFH_WID_BYPASS - 1,
>  		IFH_POS_BYPASS, IFH_LEN * 4, PACK, 0);
>  }
>  
> -static void lan966x_ifh_set_port(void *ifh, u64 bypass)
> +void lan966x_ifh_set_port(void *ifh, u64 bypass)
>  {
>  	packing(ifh, &bypass, IFH_POS_DSTS + IFH_WID_DSTS - 1,
>  		IFH_POS_DSTS, IFH_LEN * 4, PACK, 0);
> diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
> index 81c0b11097ce2..ce8b2eb13a9aa 100644
> --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
> +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
> @@ -105,11 +105,13 @@ enum macaccess_entry_type {
>   * FDMA_PASS, frame is valid and can be used
>   * FDMA_ERROR, something went wrong, stop getting more frames
>   * FDMA_DROP, frame is dropped, but continue to get more frames
> + * FDMA_TX, frame is given to TX, but continue to get more frames
>   */
>  enum lan966x_fdma_action {
>  	FDMA_PASS = 0,
>  	FDMA_ERROR,
>  	FDMA_DROP,
> +	FDMA_TX,
>  };
>  
>  struct lan966x_port;
> @@ -176,6 +178,7 @@ struct lan966x_tx_dcb_buf {
>  	dma_addr_t dma_addr;
>  	struct net_device *dev;
>  	struct sk_buff *skb;
> +	struct xdp_frame *xdpf;

Couldn't you make an union out of skb and xdpf? I'd say these two are
mutually exclusive, no? I believe this would simplify some things.

>  	u32 len;
>  	u32 used : 1;
>  	u32 ptp : 1;
> @@ -360,6 +363,8 @@ bool lan966x_hw_offload(struct lan966x *lan966x, u32 port, struct sk_buff *skb);
>  
>  void lan966x_ifh_get_src_port(void *ifh, u64 *src_port);
>  void lan966x_ifh_get_timestamp(void *ifh, u64 *timestamp);
> +void lan966x_ifh_set_bypass(void *ifh, u64 bypass);
> +void lan966x_ifh_set_port(void *ifh, u64 bypass);
>  
>  void lan966x_stats_get(struct net_device *dev,
>  		       struct rtnl_link_stats64 *stats);
> @@ -460,6 +465,9 @@ u32 lan966x_ptp_get_period_ps(void);
>  int lan966x_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts);
>  
>  int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev);
> +int lan966x_fdma_xmit_xdpf(struct lan966x_port *port,
> +			   struct xdp_frame *frame,
> +			   struct page *page);
>  int lan966x_fdma_change_mtu(struct lan966x *lan966x);
>  void lan966x_fdma_netdev_init(struct lan966x *lan966x, struct net_device *dev);
>  void lan966x_fdma_netdev_deinit(struct lan966x *lan966x, struct net_device *dev);
> diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
> index a99657154cca4..e7998fef7048c 100644
> --- a/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
> +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
> @@ -54,6 +54,7 @@ int lan966x_xdp_run(struct lan966x_port *port, struct page *page, u32 data_len)
>  {
>  	struct bpf_prog *xdp_prog = port->xdp_prog;
>  	struct lan966x *lan966x = port->lan966x;
> +	struct xdp_frame *xdpf;
>  	struct xdp_buff xdp;
>  	u32 act;
>  
> @@ -66,6 +67,13 @@ int lan966x_xdp_run(struct lan966x_port *port, struct page *page, u32 data_len)
>  	switch (act) {
>  	case XDP_PASS:
>  		return FDMA_PASS;
> +	case XDP_TX:
> +		xdpf = xdp_convert_buff_to_frame(&xdp);
> +		if (!xdpf)
> +			return FDMA_DROP;

I would generally challenge the need for xdp_frame in XDP_TX path. You
probably would be good to go with calling directly
page_pool_put_full_page() on cleaning side. This frame is not going to be
redirected so I don't see the need for carrying additional info. I'm
bringing this up as I was observing performance improvement on ice driver
when I decided to operate directly on xdp_buff for XDP_TX.

But it's of course up to you.

> +
> +		return lan966x_fdma_xmit_xdpf(port, xdpf, page) ?
> +		       FDMA_DROP : FDMA_TX;
>  	default:
>  		bpf_warn_invalid_xdp_action(port->dev, xdp_prog, act);
>  		fallthrough;
> -- 
> 2.38.0
>
  
Horatiu Vultur Nov. 23, 2022, 8:19 p.m. UTC | #2
The 11/22/2022 23:27, Maciej Fijalkowski wrote:
> 
> On Tue, Nov 22, 2022 at 10:44:12PM +0100, Horatiu Vultur wrote:
> > Extend lan966x XDP support with the action XDP_TX. In this case when the
> > received buffer needs to execute XDP_TX, the buffer will be moved to the
> > TX buffers. So a new RX buffer will be allocated.
> > When the TX finish with the frame, it would give back the buffer to the
> > page pool.
> >
> > Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
> > ---
...
> >
> >  struct lan966x_port;
> > @@ -176,6 +178,7 @@ struct lan966x_tx_dcb_buf {
> >       dma_addr_t dma_addr;
> >       struct net_device *dev;
> >       struct sk_buff *skb;
> > +     struct xdp_frame *xdpf;
> 
> Couldn't you make an union out of skb and xdpf? I'd say these two are
> mutually exclusive, no? I believe this would simplify some things.

Yes, skb and xdpf are mutually exclusive.
Also Alexander Lobakin mention something similar and I was not sure.
Now that I have tried it I can see it that is more clear that skb and
xdpf are mutually exclusive and also reduce the size of the struct.
So I will update this in the next series.

> 
> >       u32 len;
> >       u32 used : 1;
> >       u32 ptp : 1;
> > @@ -360,6 +363,8 @@ bool lan966x_hw_offload(struct lan966x *lan966x, u32 port, struct sk_buff *skb);
> >
> >  void lan966x_ifh_get_src_port(void *ifh, u64 *src_port);
> >  void lan966x_ifh_get_timestamp(void *ifh, u64 *timestamp);
> > +void lan966x_ifh_set_bypass(void *ifh, u64 bypass);
> > +void lan966x_ifh_set_port(void *ifh, u64 bypass);
> >
> >  void lan966x_stats_get(struct net_device *dev,
> >                      struct rtnl_link_stats64 *stats);
> > @@ -460,6 +465,9 @@ u32 lan966x_ptp_get_period_ps(void);
> >  int lan966x_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts);
> >
> >  int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev);
> > +int lan966x_fdma_xmit_xdpf(struct lan966x_port *port,
> > +                        struct xdp_frame *frame,
> > +                        struct page *page);
> >  int lan966x_fdma_change_mtu(struct lan966x *lan966x);
> >  void lan966x_fdma_netdev_init(struct lan966x *lan966x, struct net_device *dev);
> >  void lan966x_fdma_netdev_deinit(struct lan966x *lan966x, struct net_device *dev);
> > diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
> > index a99657154cca4..e7998fef7048c 100644
> > --- a/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
> > +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
> > @@ -54,6 +54,7 @@ int lan966x_xdp_run(struct lan966x_port *port, struct page *page, u32 data_len)
> >  {
> >       struct bpf_prog *xdp_prog = port->xdp_prog;
> >       struct lan966x *lan966x = port->lan966x;
> > +     struct xdp_frame *xdpf;
> >       struct xdp_buff xdp;
> >       u32 act;
> >
> > @@ -66,6 +67,13 @@ int lan966x_xdp_run(struct lan966x_port *port, struct page *page, u32 data_len)
> >       switch (act) {
> >       case XDP_PASS:
> >               return FDMA_PASS;
> > +     case XDP_TX:
> > +             xdpf = xdp_convert_buff_to_frame(&xdp);
> > +             if (!xdpf)
> > +                     return FDMA_DROP;
> 
> I would generally challenge the need for xdp_frame in XDP_TX path. You
> probably would be good to go with calling directly
> page_pool_put_full_page() on cleaning side. This frame is not going to be
> redirected so I don't see the need for carrying additional info. I'm
> bringing this up as I was observing performance improvement on ice driver
> when I decided to operate directly on xdp_buff for XDP_TX.

Thanks for suggestion. I definetly see your point.
I would prefer for now to keep this like it is. Because I think in the
near future I should do a proper investigation to see where the
performance of the FDMA can be improved. And this will
definetly be on the TODO.
> 
> But it's of course up to you.

> 
> > +
> > +             return lan966x_fdma_xmit_xdpf(port, xdpf, page) ?
> > +                    FDMA_DROP : FDMA_TX;
> >       default:
> >               bpf_warn_invalid_xdp_action(port->dev, xdp_prog, act);
> >               fallthrough;
> > --
> > 2.38.0
> >
  

Patch

diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
index f8287a6a86ed5..23e1cad0f5d37 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
@@ -411,12 +411,18 @@  static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight)
 		dcb_buf->dev->stats.tx_bytes += dcb_buf->len;
 
 		dcb_buf->used = false;
-		dma_unmap_single(lan966x->dev,
-				 dcb_buf->dma_addr,
-				 dcb_buf->len,
-				 DMA_TO_DEVICE);
-		if (!dcb_buf->ptp)
-			dev_kfree_skb_any(dcb_buf->skb);
+		if (dcb_buf->skb) {
+			dma_unmap_single(lan966x->dev,
+					 dcb_buf->dma_addr,
+					 dcb_buf->len,
+					 DMA_TO_DEVICE);
+
+			if (!dcb_buf->ptp)
+				napi_consume_skb(dcb_buf->skb, weight);
+		}
+
+		if (dcb_buf->xdpf)
+			xdp_return_frame_rx_napi(dcb_buf->xdpf);
 
 		clear = true;
 	}
@@ -549,6 +555,9 @@  static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight)
 			lan966x_fdma_rx_free_page(rx);
 			lan966x_fdma_rx_advance_dcb(rx);
 			goto allocate_new;
+		case FDMA_TX:
+			lan966x_fdma_rx_advance_dcb(rx);
+			continue;
 		case FDMA_DROP:
 			lan966x_fdma_rx_free_page(rx);
 			lan966x_fdma_rx_advance_dcb(rx);
@@ -670,6 +679,62 @@  static void lan966x_fdma_tx_start(struct lan966x_tx *tx, int next_to_use)
 	tx->last_in_use = next_to_use;
 }
 
+int lan966x_fdma_xmit_xdpf(struct lan966x_port *port,
+			   struct xdp_frame *xdpf,
+			   struct page *page)
+{
+	struct lan966x *lan966x = port->lan966x;
+	struct lan966x_tx_dcb_buf *next_dcb_buf;
+	struct lan966x_tx *tx = &lan966x->tx;
+	dma_addr_t dma_addr;
+	int next_to_use;
+	__be32 *ifh;
+	int ret = 0;
+
+	spin_lock(&lan966x->tx_lock);
+
+	/* Get next index */
+	next_to_use = lan966x_fdma_get_next_dcb(tx);
+	if (next_to_use < 0) {
+		netif_stop_queue(port->dev);
+		ret = NETDEV_TX_BUSY;
+		goto out;
+	}
+
+	/* Generate new IFH */
+	ifh = page_address(page) + XDP_PACKET_HEADROOM;
+	memset(ifh, 0x0, sizeof(__be32) * IFH_LEN);
+	lan966x_ifh_set_bypass(ifh, 1);
+	lan966x_ifh_set_port(ifh, BIT_ULL(port->chip_port));
+
+	dma_addr = page_pool_get_dma_addr(page);
+	dma_sync_single_for_device(lan966x->dev, dma_addr + XDP_PACKET_HEADROOM,
+				   xdpf->len + IFH_LEN_BYTES,
+				   DMA_TO_DEVICE);
+
+	/* Setup next dcb */
+	lan966x_fdma_tx_setup_dcb(tx, next_to_use, xdpf->len + IFH_LEN_BYTES,
+				  dma_addr + XDP_PACKET_HEADROOM);
+
+	/* Fill up the buffer */
+	next_dcb_buf = &tx->dcbs_buf[next_to_use];
+	next_dcb_buf->skb = NULL;
+	next_dcb_buf->xdpf = xdpf;
+	next_dcb_buf->len = xdpf->len + IFH_LEN_BYTES;
+	next_dcb_buf->dma_addr = dma_addr;
+	next_dcb_buf->used = true;
+	next_dcb_buf->ptp = false;
+	next_dcb_buf->dev = port->dev;
+
+	/* Start the transmission */
+	lan966x_fdma_tx_start(tx, next_to_use);
+
+out:
+	spin_unlock(&lan966x->tx_lock);
+
+	return ret;
+}
+
 int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
 {
 	struct lan966x_port *port = netdev_priv(dev);
@@ -726,6 +791,7 @@  int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
 	/* Fill up the buffer */
 	next_dcb_buf = &tx->dcbs_buf[next_to_use];
 	next_dcb_buf->skb = skb;
+	next_dcb_buf->xdpf = NULL;
 	next_dcb_buf->len = skb->len;
 	next_dcb_buf->dma_addr = dma_addr;
 	next_dcb_buf->used = true;
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 42be5d0f1f015..0b7707306da26 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -302,13 +302,13 @@  static int lan966x_port_ifh_xmit(struct sk_buff *skb,
 	return NETDEV_TX_BUSY;
 }
 
-static void lan966x_ifh_set_bypass(void *ifh, u64 bypass)
+void lan966x_ifh_set_bypass(void *ifh, u64 bypass)
 {
 	packing(ifh, &bypass, IFH_POS_BYPASS + IFH_WID_BYPASS - 1,
 		IFH_POS_BYPASS, IFH_LEN * 4, PACK, 0);
 }
 
-static void lan966x_ifh_set_port(void *ifh, u64 bypass)
+void lan966x_ifh_set_port(void *ifh, u64 bypass)
 {
 	packing(ifh, &bypass, IFH_POS_DSTS + IFH_WID_DSTS - 1,
 		IFH_POS_DSTS, IFH_LEN * 4, PACK, 0);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index 81c0b11097ce2..ce8b2eb13a9aa 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -105,11 +105,13 @@  enum macaccess_entry_type {
  * FDMA_PASS, frame is valid and can be used
  * FDMA_ERROR, something went wrong, stop getting more frames
  * FDMA_DROP, frame is dropped, but continue to get more frames
+ * FDMA_TX, frame is given to TX, but continue to get more frames
  */
 enum lan966x_fdma_action {
 	FDMA_PASS = 0,
 	FDMA_ERROR,
 	FDMA_DROP,
+	FDMA_TX,
 };
 
 struct lan966x_port;
@@ -176,6 +178,7 @@  struct lan966x_tx_dcb_buf {
 	dma_addr_t dma_addr;
 	struct net_device *dev;
 	struct sk_buff *skb;
+	struct xdp_frame *xdpf;
 	u32 len;
 	u32 used : 1;
 	u32 ptp : 1;
@@ -360,6 +363,8 @@  bool lan966x_hw_offload(struct lan966x *lan966x, u32 port, struct sk_buff *skb);
 
 void lan966x_ifh_get_src_port(void *ifh, u64 *src_port);
 void lan966x_ifh_get_timestamp(void *ifh, u64 *timestamp);
+void lan966x_ifh_set_bypass(void *ifh, u64 bypass);
+void lan966x_ifh_set_port(void *ifh, u64 bypass);
 
 void lan966x_stats_get(struct net_device *dev,
 		       struct rtnl_link_stats64 *stats);
@@ -460,6 +465,9 @@  u32 lan966x_ptp_get_period_ps(void);
 int lan966x_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts);
 
 int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev);
+int lan966x_fdma_xmit_xdpf(struct lan966x_port *port,
+			   struct xdp_frame *frame,
+			   struct page *page);
 int lan966x_fdma_change_mtu(struct lan966x *lan966x);
 void lan966x_fdma_netdev_init(struct lan966x *lan966x, struct net_device *dev);
 void lan966x_fdma_netdev_deinit(struct lan966x *lan966x, struct net_device *dev);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
index a99657154cca4..e7998fef7048c 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
@@ -54,6 +54,7 @@  int lan966x_xdp_run(struct lan966x_port *port, struct page *page, u32 data_len)
 {
 	struct bpf_prog *xdp_prog = port->xdp_prog;
 	struct lan966x *lan966x = port->lan966x;
+	struct xdp_frame *xdpf;
 	struct xdp_buff xdp;
 	u32 act;
 
@@ -66,6 +67,13 @@  int lan966x_xdp_run(struct lan966x_port *port, struct page *page, u32 data_len)
 	switch (act) {
 	case XDP_PASS:
 		return FDMA_PASS;
+	case XDP_TX:
+		xdpf = xdp_convert_buff_to_frame(&xdp);
+		if (!xdpf)
+			return FDMA_DROP;
+
+		return lan966x_fdma_xmit_xdpf(port, xdpf, page) ?
+		       FDMA_DROP : FDMA_TX;
 	default:
 		bpf_warn_invalid_xdp_action(port->dev, xdp_prog, act);
 		fallthrough;