[RFC,v4,03/17] vsock/virtio: support to send non-linear skb

Message ID 20230603204939.1598818-4-AVKrasnov@sberdevices.ru
State New
Headers
Series vsock: MSG_ZEROCOPY flag support |

Commit Message

Arseniy Krasnov June 3, 2023, 8:49 p.m. UTC
  For non-linear skb use its pages from fragment array as buffers in
virtio tx queue. These pages are already pinned by 'get_user_pages()'
during such skb creation.

Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
---
 net/vmw_vsock/virtio_transport.c | 37 ++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)
  

Comments

Bobby Eshleman June 12, 2023, 6:30 p.m. UTC | #1
On Sat, Jun 03, 2023 at 11:49:25PM +0300, Arseniy Krasnov wrote:
> For non-linear skb use its pages from fragment array as buffers in
> virtio tx queue. These pages are already pinned by 'get_user_pages()'
> during such skb creation.
> 
> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
> ---
>  net/vmw_vsock/virtio_transport.c | 37 ++++++++++++++++++++++++++------
>  1 file changed, 31 insertions(+), 6 deletions(-)
> 
> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
> index e95df847176b..6053d8341091 100644
> --- a/net/vmw_vsock/virtio_transport.c
> +++ b/net/vmw_vsock/virtio_transport.c
> @@ -100,7 +100,9 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>  	vq = vsock->vqs[VSOCK_VQ_TX];
>  
>  	for (;;) {
> -		struct scatterlist hdr, buf, *sgs[2];
> +		/* +1 is for packet header. */
> +		struct scatterlist *sgs[MAX_SKB_FRAGS + 1];
> +		struct scatterlist bufs[MAX_SKB_FRAGS + 1];
>  		int ret, in_sg = 0, out_sg = 0;
>  		struct sk_buff *skb;
>  		bool reply;
> @@ -111,12 +113,35 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>  
>  		virtio_transport_deliver_tap_pkt(skb);
>  		reply = virtio_vsock_skb_reply(skb);
> +		sg_init_one(&bufs[0], virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
> +		sgs[out_sg++] = &bufs[0];
> +
> +		if (skb_is_nonlinear(skb)) {
> +			struct skb_shared_info *si;
> +			int i;
> +
> +			si = skb_shinfo(skb);
> +
> +			for (i = 0; i < si->nr_frags; i++) {
> +				skb_frag_t *skb_frag = &si->frags[i];
> +				void *va = page_to_virt(skb_frag->bv_page);
> +
> +				/* We will use 'page_to_virt()' for userspace page here,
> +				 * because virtio layer will call 'virt_to_phys()' later
> +				 * to fill buffer descriptor. We don't touch memory at
> +				 * "virtual" address of this page.
> +				 */
> +				sg_init_one(&bufs[i + 1],
> +					    va + skb_frag->bv_offset,
> +					    skb_frag->bv_len);
> +				sgs[out_sg++] = &bufs[i + 1];
> +			}
> +		} else {
> +			if (skb->len > 0) {
> +				sg_init_one(&bufs[1], skb->data, skb->len);
> +				sgs[out_sg++] = &bufs[1];
> +			}
>  
> -		sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
> -		sgs[out_sg++] = &hdr;
> -		if (skb->len > 0) {
> -			sg_init_one(&buf, skb->data, skb->len);
> -			sgs[out_sg++] = &buf;
>  		}
>  
>  		ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
> -- 
> 2.25.1
> 

LGTM.

Reviewed-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
  
Stefano Garzarella June 26, 2023, 3:36 p.m. UTC | #2
On Sat, Jun 03, 2023 at 11:49:25PM +0300, Arseniy Krasnov wrote:
>For non-linear skb use its pages from fragment array as buffers in
>virtio tx queue. These pages are already pinned by 'get_user_pages()'
>during such skb creation.
>
>Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>---
> net/vmw_vsock/virtio_transport.c | 37 ++++++++++++++++++++++++++------
> 1 file changed, 31 insertions(+), 6 deletions(-)
>
>diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>index e95df847176b..6053d8341091 100644
>--- a/net/vmw_vsock/virtio_transport.c
>+++ b/net/vmw_vsock/virtio_transport.c
>@@ -100,7 +100,9 @@ virtio_transport_send_pkt_work(struct work_struct *work)
> 	vq = vsock->vqs[VSOCK_VQ_TX];
>
> 	for (;;) {
>-		struct scatterlist hdr, buf, *sgs[2];
>+		/* +1 is for packet header. */
>+		struct scatterlist *sgs[MAX_SKB_FRAGS + 1];
>+		struct scatterlist bufs[MAX_SKB_FRAGS + 1];
> 		int ret, in_sg = 0, out_sg = 0;
> 		struct sk_buff *skb;
> 		bool reply;
>@@ -111,12 +113,35 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>
> 		virtio_transport_deliver_tap_pkt(skb);
> 		reply = virtio_vsock_skb_reply(skb);
>+		sg_init_one(&bufs[0], virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
>+		sgs[out_sg++] = &bufs[0];

Can we use out_sg also to index bufs (here and in the rest of the code)?

E.g.

		sg_init_one(&bufs[out_sg], ...)
		sgs[out_sg] = &bufs[out_sg];
		++out_sg;

		...
			if (skb->len > 0) {
				sg_init_one(&bufs[out_sg], skb->data, skb->len);
				sgs[out_sg] = &bufs[out_sg];
				++out_sg;
			}

		etc...

>+

For readability, I would move the smaller branch above:

		if (!skb_is_nonlinear(skb)) {
			// small block
			...
		} else {
			// big block
			...
		}

>+		if (skb_is_nonlinear(skb)) {
>+			struct skb_shared_info *si;
>+			int i;
>+
>+			si = skb_shinfo(skb);
>+
>+			for (i = 0; i < si->nr_frags; i++) {
>+				skb_frag_t *skb_frag = &si->frags[i];
>+				void *va = page_to_virt(skb_frag->bv_page);
>+
>+				/* We will use 'page_to_virt()' for userspace page here,
>+				 * because virtio layer will call 'virt_to_phys()' later
>+				 * to fill buffer descriptor. We don't touch memory at
>+				 * "virtual" address of this page.
>+				 */
>+				sg_init_one(&bufs[i + 1],
>+					    va + skb_frag->bv_offset,
>+					    skb_frag->bv_len);
>+				sgs[out_sg++] = &bufs[i + 1];
>+			}
>+		} else {
>+			if (skb->len > 0) {

Should we do the same check (skb->len > 0) for nonlinear skb as well?
Or do the nonlinear ones necessarily have len > 0?

>+				sg_init_one(&bufs[1], skb->data, skb->len);
>+				sgs[out_sg++] = &bufs[1];
>+			}
>
    ^
Blank line that we can remove.

Stefano

>-		sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
>-		sgs[out_sg++] = &hdr;
>-		if (skb->len > 0) {
>-			sg_init_one(&buf, skb->data, skb->len);
>-			sgs[out_sg++] = &buf;
> 		}
>
> 		ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
>-- 
>2.25.1
>
  
Arseniy Krasnov June 27, 2023, 4:39 a.m. UTC | #3
On 26.06.2023 18:36, Stefano Garzarella wrote:
> On Sat, Jun 03, 2023 at 11:49:25PM +0300, Arseniy Krasnov wrote:
>> For non-linear skb use its pages from fragment array as buffers in
>> virtio tx queue. These pages are already pinned by 'get_user_pages()'
>> during such skb creation.
>>
>> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>> ---
>> net/vmw_vsock/virtio_transport.c | 37 ++++++++++++++++++++++++++------
>> 1 file changed, 31 insertions(+), 6 deletions(-)
>>
>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>> index e95df847176b..6053d8341091 100644
>> --- a/net/vmw_vsock/virtio_transport.c
>> +++ b/net/vmw_vsock/virtio_transport.c
>> @@ -100,7 +100,9 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>>     vq = vsock->vqs[VSOCK_VQ_TX];
>>
>>     for (;;) {
>> -        struct scatterlist hdr, buf, *sgs[2];
>> +        /* +1 is for packet header. */
>> +        struct scatterlist *sgs[MAX_SKB_FRAGS + 1];
>> +        struct scatterlist bufs[MAX_SKB_FRAGS + 1];
>>         int ret, in_sg = 0, out_sg = 0;
>>         struct sk_buff *skb;
>>         bool reply;
>> @@ -111,12 +113,35 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>>
>>         virtio_transport_deliver_tap_pkt(skb);
>>         reply = virtio_vsock_skb_reply(skb);
>> +        sg_init_one(&bufs[0], virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
>> +        sgs[out_sg++] = &bufs[0];
> 
> Can we use out_sg also to index bufs (here and in the rest of the code)?
> 
> E.g.
> 
>         sg_init_one(&bufs[out_sg], ...)
>         sgs[out_sg] = &bufs[out_sg];
>         ++out_sg;
> 
>         ...
>             if (skb->len > 0) {
>                 sg_init_one(&bufs[out_sg], skb->data, skb->len);
>                 sgs[out_sg] = &bufs[out_sg];
>                 ++out_sg;
>             }
> 
>         etc...
> 
>> +
> 
> For readability, I would move the smaller branch above:
> 
>         if (!skb_is_nonlinear(skb)) {
>             // small block
>             ...
>         } else {
>             // big block
>             ...
>         }
> 
>> +        if (skb_is_nonlinear(skb)) {
>> +            struct skb_shared_info *si;
>> +            int i;
>> +
>> +            si = skb_shinfo(skb);
>> +
>> +            for (i = 0; i < si->nr_frags; i++) {
>> +                skb_frag_t *skb_frag = &si->frags[i];
>> +                void *va = page_to_virt(skb_frag->bv_page);
>> +
>> +                /* We will use 'page_to_virt()' for userspace page here,
>> +                 * because virtio layer will call 'virt_to_phys()' later
>> +                 * to fill buffer descriptor. We don't touch memory at
>> +                 * "virtual" address of this page.
>> +                 */
>> +                sg_init_one(&bufs[i + 1],
>> +                        va + skb_frag->bv_offset,
>> +                        skb_frag->bv_len);
>> +                sgs[out_sg++] = &bufs[i + 1];
>> +            }
>> +        } else {
>> +            if (skb->len > 0) {
> 
> Should we do the same check (skb->len > 0) for nonlinear skb as well?
> Or do the nonlinear ones necessarily have len > 0?

Yes, non-linear skb always has 'data_len' > 0, e.g. such skbs always have some
data in it.

Thanks, Arseniy

> 
>> +                sg_init_one(&bufs[1], skb->data, skb->len);
>> +                sgs[out_sg++] = &bufs[1];
>> +            }
>>
>    ^
> Blank line that we can remove.
> 
> Stefano
> 
>> -        sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
>> -        sgs[out_sg++] = &hdr;
>> -        if (skb->len > 0) {
>> -            sg_init_one(&buf, skb->data, skb->len);
>> -            sgs[out_sg++] = &buf;
>>         }
>>
>>         ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
>> -- 
>> 2.25.1
>>
>
  
Stefano Garzarella June 27, 2023, 7:49 a.m. UTC | #4
On Tue, Jun 27, 2023 at 07:39:41AM +0300, Arseniy Krasnov wrote:
>
>
>On 26.06.2023 18:36, Stefano Garzarella wrote:
>> On Sat, Jun 03, 2023 at 11:49:25PM +0300, Arseniy Krasnov wrote:
>>> For non-linear skb use its pages from fragment array as buffers in
>>> virtio tx queue. These pages are already pinned by 'get_user_pages()'
>>> during such skb creation.
>>>
>>> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>>> ---
>>> net/vmw_vsock/virtio_transport.c | 37 ++++++++++++++++++++++++++------
>>> 1 file changed, 31 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>>> index e95df847176b..6053d8341091 100644
>>> --- a/net/vmw_vsock/virtio_transport.c
>>> +++ b/net/vmw_vsock/virtio_transport.c
>>> @@ -100,7 +100,9 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>>>     vq = vsock->vqs[VSOCK_VQ_TX];
>>>
>>>     for (;;) {
>>> -        struct scatterlist hdr, buf, *sgs[2];
>>> +        /* +1 is for packet header. */
>>> +        struct scatterlist *sgs[MAX_SKB_FRAGS + 1];
>>> +        struct scatterlist bufs[MAX_SKB_FRAGS + 1];
>>>         int ret, in_sg = 0, out_sg = 0;
>>>         struct sk_buff *skb;
>>>         bool reply;
>>> @@ -111,12 +113,35 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>>>
>>>         virtio_transport_deliver_tap_pkt(skb);
>>>         reply = virtio_vsock_skb_reply(skb);
>>> +        sg_init_one(&bufs[0], virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
>>> +        sgs[out_sg++] = &bufs[0];
>>
>> Can we use out_sg also to index bufs (here and in the rest of the code)?
>>
>> E.g.
>>
>>         sg_init_one(&bufs[out_sg], ...)
>>         sgs[out_sg] = &bufs[out_sg];
>>         ++out_sg;
>>
>>         ...
>>             if (skb->len > 0) {
>>                 sg_init_one(&bufs[out_sg], skb->data, skb->len);
>>                 sgs[out_sg] = &bufs[out_sg];
>>                 ++out_sg;
>>             }
>>
>>         etc...
>>
>>> +
>>
>> For readability, I would move the smaller branch above:
>>
>>         if (!skb_is_nonlinear(skb)) {
>>             // small block
>>             ...
>>         } else {
>>             // big block
>>             ...
>>         }
>>
>>> +        if (skb_is_nonlinear(skb)) {
>>> +            struct skb_shared_info *si;
>>> +            int i;
>>> +
>>> +            si = skb_shinfo(skb);
>>> +
>>> +            for (i = 0; i < si->nr_frags; i++) {
>>> +                skb_frag_t *skb_frag = &si->frags[i];
>>> +                void *va = page_to_virt(skb_frag->bv_page);
>>> +
>>> +                /* We will use 'page_to_virt()' for userspace page here,
>>> +                 * because virtio layer will call 'virt_to_phys()' later
>>> +                 * to fill buffer descriptor. We don't touch memory at
>>> +                 * "virtual" address of this page.
>>> +                 */
>>> +                sg_init_one(&bufs[i + 1],
>>> +                        va + skb_frag->bv_offset,
>>> +                        skb_frag->bv_len);
>>> +                sgs[out_sg++] = &bufs[i + 1];
>>> +            }
>>> +        } else {
>>> +            if (skb->len > 0) {
>>
>> Should we do the same check (skb->len > 0) for nonlinear skb as well?
>> Or do the nonlinear ones necessarily have len > 0?
>
>Yes, non-linear skb always has 'data_len' > 0, e.g. such skbs always have some
>data in it.

Okay, makes sense ;-)

Thanks,
Stefano

>
>Thanks, Arseniy
>
>>
>>> +                sg_init_one(&bufs[1], skb->data, skb->len);
>>> +                sgs[out_sg++] = &bufs[1];
>>> +            }
>>>
>>    ^
>> Blank line that we can remove.
>>
>> Stefano
>>
>>> -        sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
>>> -        sgs[out_sg++] = &hdr;
>>> -        if (skb->len > 0) {
>>> -            sg_init_one(&buf, skb->data, skb->len);
>>> -            sgs[out_sg++] = &buf;
>>>         }
>>>
>>>         ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
>>> -- 
>>> 2.25.1
>>>
>>
>
  

Patch

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index e95df847176b..6053d8341091 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -100,7 +100,9 @@  virtio_transport_send_pkt_work(struct work_struct *work)
 	vq = vsock->vqs[VSOCK_VQ_TX];
 
 	for (;;) {
-		struct scatterlist hdr, buf, *sgs[2];
+		/* +1 is for packet header. */
+		struct scatterlist *sgs[MAX_SKB_FRAGS + 1];
+		struct scatterlist bufs[MAX_SKB_FRAGS + 1];
 		int ret, in_sg = 0, out_sg = 0;
 		struct sk_buff *skb;
 		bool reply;
@@ -111,12 +113,35 @@  virtio_transport_send_pkt_work(struct work_struct *work)
 
 		virtio_transport_deliver_tap_pkt(skb);
 		reply = virtio_vsock_skb_reply(skb);
+		sg_init_one(&bufs[0], virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
+		sgs[out_sg++] = &bufs[0];
+
+		if (skb_is_nonlinear(skb)) {
+			struct skb_shared_info *si;
+			int i;
+
+			si = skb_shinfo(skb);
+
+			for (i = 0; i < si->nr_frags; i++) {
+				skb_frag_t *skb_frag = &si->frags[i];
+				void *va = page_to_virt(skb_frag->bv_page);
+
+				/* We will use 'page_to_virt()' for userspace page here,
+				 * because virtio layer will call 'virt_to_phys()' later
+				 * to fill buffer descriptor. We don't touch memory at
+				 * "virtual" address of this page.
+				 */
+				sg_init_one(&bufs[i + 1],
+					    va + skb_frag->bv_offset,
+					    skb_frag->bv_len);
+				sgs[out_sg++] = &bufs[i + 1];
+			}
+		} else {
+			if (skb->len > 0) {
+				sg_init_one(&bufs[1], skb->data, skb->len);
+				sgs[out_sg++] = &bufs[1];
+			}
 
-		sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
-		sgs[out_sg++] = &hdr;
-		if (skb->len > 0) {
-			sg_init_one(&buf, skb->data, skb->len);
-			sgs[out_sg++] = &buf;
 		}
 
 		ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);