[net-next,v4,1/3] net: stmmac: introduce wrapper for struct xdp_buff

Message ID 20230413032541.885238-2-yoong.siang.song@intel.com
State New
Headers
Series XDP Rx HWTS metadata for stmmac driver |

Commit Message

Song Yoong Siang April 13, 2023, 3:25 a.m. UTC
  Introduce struct stmmac_xdp_buff as a preparation to support XDP Rx
metadata via kfuncs.

Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h   |  4 ++++
 .../net/ethernet/stmicro/stmmac/stmmac_main.c  | 18 +++++++++---------
 2 files changed, 13 insertions(+), 9 deletions(-)
  

Comments

Jesper Dangaard Brouer April 13, 2023, 5:09 p.m. UTC | #1
On 13/04/2023 05.25, Song Yoong Siang wrote:
> Introduce struct stmmac_xdp_buff as a preparation to support XDP Rx
> metadata via kfuncs.
> 
> Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
> Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com>
> ---
>   drivers/net/ethernet/stmicro/stmmac/stmmac.h   |  4 ++++
>   .../net/ethernet/stmicro/stmmac/stmmac_main.c  | 18 +++++++++---------
>   2 files changed, 13 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> index 3d15e1e92e18..ac8ccf851708 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> @@ -92,6 +92,10 @@ struct stmmac_rx_buffer {
>   	dma_addr_t sec_addr;
>   };
>   
> +struct stmmac_xdp_buff {
> +	struct xdp_buff xdp;
> +};
> +
>   struct stmmac_rx_queue {
>   	u32 rx_count_frames;
>   	u32 queue_index;
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index d7fcab057032..6ffce52ca837 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -5188,9 +5188,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
>   	int status = 0, coe = priv->hw->rx_csum;
>   	unsigned int next_entry = rx_q->cur_rx;
>   	enum dma_data_direction dma_dir;
> +	struct stmmac_xdp_buff ctx = {};

This code trick {} will zero out the struct.

Is this done purpose and really needed?

On x86_64 this unfortunately generates an asm instruction: rep stos

A repeated store string operation, for zeroing out memory, which is
slow. (Because[1] it supports be suspended by an exception or interrupt,
which requires it to store/restore CPU flags).

[1] https://www.felixcloutier.com/x86/rep:repe:repz:repne:repnz#tbl-4-22


>   	unsigned int desc_size;
>   	struct sk_buff *skb = NULL;
> -	struct xdp_buff xdp;
>   	int xdp_status = 0;
>   	int buf_sz;
>   
> @@ -5311,17 +5311,17 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
>   			dma_sync_single_for_cpu(priv->device, buf->addr,
>   						buf1_len, dma_dir);
>   
> -			xdp_init_buff(&xdp, buf_sz, &rx_q->xdp_rxq);
> -			xdp_prepare_buff(&xdp, page_address(buf->page),
> +			xdp_init_buff(&ctx.xdp, buf_sz, &rx_q->xdp_rxq);
> +			xdp_prepare_buff(&ctx.xdp, page_address(buf->page),
>   					 buf->page_offset, buf1_len, false);
>   
> -			pre_len = xdp.data_end - xdp.data_hard_start -
> +			pre_len = ctx.xdp.data_end - ctx.xdp.data_hard_start -
>   				  buf->page_offset;
> -			skb = stmmac_xdp_run_prog(priv, &xdp);
> +			skb = stmmac_xdp_run_prog(priv, &ctx.xdp);
>   			/* Due xdp_adjust_tail: DMA sync for_device
>   			 * cover max len CPU touch
>   			 */
> -			sync_len = xdp.data_end - xdp.data_hard_start -
> +			sync_len = ctx.xdp.data_end - ctx.xdp.data_hard_start -
>   				   buf->page_offset;
>   			sync_len = max(sync_len, pre_len);
>   
> @@ -5331,7 +5331,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
>   
>   				if (xdp_res & STMMAC_XDP_CONSUMED) {
>   					page_pool_put_page(rx_q->page_pool,
> -							   virt_to_head_page(xdp.data),
> +							   virt_to_head_page(ctx.xdp.data),
>   							   sync_len, true);
>   					buf->page = NULL;
>   					priv->dev->stats.rx_dropped++;
> @@ -5359,7 +5359,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
>   
>   		if (!skb) {
>   			/* XDP program may expand or reduce tail */
> -			buf1_len = xdp.data_end - xdp.data;
> +			buf1_len = ctx.xdp.data_end - ctx.xdp.data;
>   
>   			skb = napi_alloc_skb(&ch->rx_napi, buf1_len);
>   			if (!skb) {
> @@ -5369,7 +5369,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
>   			}
>   
>   			/* XDP program may adjust header */
> -			skb_copy_to_linear_data(skb, xdp.data, buf1_len);
> +			skb_copy_to_linear_data(skb, ctx.xdp.data, buf1_len);
>   			skb_put(skb, buf1_len);
>   
>   			/* Data payload copied into SKB, page ready for recycle */
  
Song Yoong Siang April 14, 2023, 1:10 a.m. UTC | #2
On Friday, April 14, 2023 1:10 AM , Jesper Dangaard Brouer <jbrouer@redhat.com> wrote:
>On 13/04/2023 05.25, Song Yoong Siang wrote:
>> Introduce struct stmmac_xdp_buff as a preparation to support XDP Rx
>> metadata via kfuncs.
>>
>> Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
>> Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com>
>> ---
>>   drivers/net/ethernet/stmicro/stmmac/stmmac.h   |  4 ++++
>>   .../net/ethernet/stmicro/stmmac/stmmac_main.c  | 18 +++++++++---------
>>   2 files changed, 13 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>> b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>> index 3d15e1e92e18..ac8ccf851708 100644
>> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>> @@ -92,6 +92,10 @@ struct stmmac_rx_buffer {
>>   	dma_addr_t sec_addr;
>>   };
>>
>> +struct stmmac_xdp_buff {
>> +	struct xdp_buff xdp;
>> +};
>> +
>>   struct stmmac_rx_queue {
>>   	u32 rx_count_frames;
>>   	u32 queue_index;
>> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
>> b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
>> index d7fcab057032..6ffce52ca837 100644
>> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
>> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
>> @@ -5188,9 +5188,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int
>limit, u32 queue)
>>   	int status = 0, coe = priv->hw->rx_csum;
>>   	unsigned int next_entry = rx_q->cur_rx;
>>   	enum dma_data_direction dma_dir;
>> +	struct stmmac_xdp_buff ctx = {};
>
>This code trick {} will zero out the struct.
>
>Is this done purpose and really needed?
>
>On x86_64 this unfortunately generates an asm instruction: rep stos
>
>A repeated store string operation, for zeroing out memory, which is slow.
>(Because[1] it supports be suspended by an exception or interrupt, which requires
>it to store/restore CPU flags).
>
>[1] https://www.felixcloutier.com/x86/rep:repe:repz:repne:repnz#tbl-4-22

Thanks for the useful information and link. I will submit v5 to remove {}.
>
>
>>   	unsigned int desc_size;
>>   	struct sk_buff *skb = NULL;
>> -	struct xdp_buff xdp;
>>   	int xdp_status = 0;
>>   	int buf_sz;
>>
>> @@ -5311,17 +5311,17 @@ static int stmmac_rx(struct stmmac_priv *priv, int
>limit, u32 queue)
>>   			dma_sync_single_for_cpu(priv->device, buf->addr,
>>   						buf1_len, dma_dir);
>>
>> -			xdp_init_buff(&xdp, buf_sz, &rx_q->xdp_rxq);
>> -			xdp_prepare_buff(&xdp, page_address(buf->page),
>> +			xdp_init_buff(&ctx.xdp, buf_sz, &rx_q->xdp_rxq);
>> +			xdp_prepare_buff(&ctx.xdp, page_address(buf->page),
>>   					 buf->page_offset, buf1_len, false);
>>
>> -			pre_len = xdp.data_end - xdp.data_hard_start -
>> +			pre_len = ctx.xdp.data_end - ctx.xdp.data_hard_start -
>>   				  buf->page_offset;
>> -			skb = stmmac_xdp_run_prog(priv, &xdp);
>> +			skb = stmmac_xdp_run_prog(priv, &ctx.xdp);
>>   			/* Due xdp_adjust_tail: DMA sync for_device
>>   			 * cover max len CPU touch
>>   			 */
>> -			sync_len = xdp.data_end - xdp.data_hard_start -
>> +			sync_len = ctx.xdp.data_end - ctx.xdp.data_hard_start -
>>   				   buf->page_offset;
>>   			sync_len = max(sync_len, pre_len);
>>
>> @@ -5331,7 +5331,7 @@ static int stmmac_rx(struct stmmac_priv *priv,
>> int limit, u32 queue)
>>
>>   				if (xdp_res & STMMAC_XDP_CONSUMED) {
>>   					page_pool_put_page(rx_q->page_pool,
>> -
>virt_to_head_page(xdp.data),
>> +
>virt_to_head_page(ctx.xdp.data),
>>   							   sync_len, true);
>>   					buf->page = NULL;
>>   					priv->dev->stats.rx_dropped++;
>> @@ -5359,7 +5359,7 @@ static int stmmac_rx(struct stmmac_priv *priv,
>> int limit, u32 queue)
>>
>>   		if (!skb) {
>>   			/* XDP program may expand or reduce tail */
>> -			buf1_len = xdp.data_end - xdp.data;
>> +			buf1_len = ctx.xdp.data_end - ctx.xdp.data;
>>
>>   			skb = napi_alloc_skb(&ch->rx_napi, buf1_len);
>>   			if (!skb) {
>> @@ -5369,7 +5369,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int
>limit, u32 queue)
>>   			}
>>
>>   			/* XDP program may adjust header */
>> -			skb_copy_to_linear_data(skb, xdp.data, buf1_len);
>> +			skb_copy_to_linear_data(skb, ctx.xdp.data, buf1_len);
>>   			skb_put(skb, buf1_len);
>>
>>   			/* Data payload copied into SKB, page ready for recycle
>*/
  

Patch

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 3d15e1e92e18..ac8ccf851708 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -92,6 +92,10 @@  struct stmmac_rx_buffer {
 	dma_addr_t sec_addr;
 };
 
+struct stmmac_xdp_buff {
+	struct xdp_buff xdp;
+};
+
 struct stmmac_rx_queue {
 	u32 rx_count_frames;
 	u32 queue_index;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d7fcab057032..6ffce52ca837 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5188,9 +5188,9 @@  static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 	int status = 0, coe = priv->hw->rx_csum;
 	unsigned int next_entry = rx_q->cur_rx;
 	enum dma_data_direction dma_dir;
+	struct stmmac_xdp_buff ctx = {};
 	unsigned int desc_size;
 	struct sk_buff *skb = NULL;
-	struct xdp_buff xdp;
 	int xdp_status = 0;
 	int buf_sz;
 
@@ -5311,17 +5311,17 @@  static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 			dma_sync_single_for_cpu(priv->device, buf->addr,
 						buf1_len, dma_dir);
 
-			xdp_init_buff(&xdp, buf_sz, &rx_q->xdp_rxq);
-			xdp_prepare_buff(&xdp, page_address(buf->page),
+			xdp_init_buff(&ctx.xdp, buf_sz, &rx_q->xdp_rxq);
+			xdp_prepare_buff(&ctx.xdp, page_address(buf->page),
 					 buf->page_offset, buf1_len, false);
 
-			pre_len = xdp.data_end - xdp.data_hard_start -
+			pre_len = ctx.xdp.data_end - ctx.xdp.data_hard_start -
 				  buf->page_offset;
-			skb = stmmac_xdp_run_prog(priv, &xdp);
+			skb = stmmac_xdp_run_prog(priv, &ctx.xdp);
 			/* Due xdp_adjust_tail: DMA sync for_device
 			 * cover max len CPU touch
 			 */
-			sync_len = xdp.data_end - xdp.data_hard_start -
+			sync_len = ctx.xdp.data_end - ctx.xdp.data_hard_start -
 				   buf->page_offset;
 			sync_len = max(sync_len, pre_len);
 
@@ -5331,7 +5331,7 @@  static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 
 				if (xdp_res & STMMAC_XDP_CONSUMED) {
 					page_pool_put_page(rx_q->page_pool,
-							   virt_to_head_page(xdp.data),
+							   virt_to_head_page(ctx.xdp.data),
 							   sync_len, true);
 					buf->page = NULL;
 					priv->dev->stats.rx_dropped++;
@@ -5359,7 +5359,7 @@  static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 
 		if (!skb) {
 			/* XDP program may expand or reduce tail */
-			buf1_len = xdp.data_end - xdp.data;
+			buf1_len = ctx.xdp.data_end - ctx.xdp.data;
 
 			skb = napi_alloc_skb(&ch->rx_napi, buf1_len);
 			if (!skb) {
@@ -5369,7 +5369,7 @@  static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 			}
 
 			/* XDP program may adjust header */
-			skb_copy_to_linear_data(skb, xdp.data, buf1_len);
+			skb_copy_to_linear_data(skb, ctx.xdp.data, buf1_len);
 			skb_put(skb, buf1_len);
 
 			/* Data payload copied into SKB, page ready for recycle */