[RFC,v5,13/17] vsock: enable setting SO_ZEROCOPY

Message ID 20230701063947.3422088-14-AVKrasnov@sberdevices.ru
State New
Headers
Series vsock: MSG_ZEROCOPY flag support |

Commit Message

Arseniy Krasnov July 1, 2023, 6:39 a.m. UTC
  For AF_VSOCK, zerocopy tx mode depends on transport, so this option must
be set in AF_VSOCK implementation where transport is accessible (if
transport is not set during setting SO_ZEROCOPY: for example socket is
not connected, then SO_ZEROCOPY will be enabled, but once transport will
be assigned, support of this type of transmission will be checked).

To handle SO_ZEROCOPY, AF_VSOCK implementation uses SOCK_CUSTOM_SOCKOPT
bit, thus handling SOL_SOCKET option operations, but all of them except
SO_ZEROCOPY will be forwarded to the generic handler by calling
'sock_setsockopt()'.

Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
---
 Changelog:
 v4 -> v5:
  * This patch is totally reworked. Previous version added check for
    PF_VSOCK directly to 'net/core/sock.c', thus allowing to set
    SO_ZEROCOPY for AF_VSOCK type of socket. This new version catches
    attempt to set SO_ZEROCOPY in 'af_vsock.c'. All other options
    except SO_ZEROCOPY are forwarded to generic handler. Only this
    option is processed in 'af_vsock.c'. Handling this option includes
    access to transport to check that MSG_ZEROCOPY transmission is
    supported by the current transport (if it is set, if not - transport
    will be checked during 'connect()').

 net/vmw_vsock/af_vsock.c | 44 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)
  

Comments

Stefano Garzarella July 6, 2023, 4:56 p.m. UTC | #1
On Sat, Jul 01, 2023 at 09:39:43AM +0300, Arseniy Krasnov wrote:
>For AF_VSOCK, zerocopy tx mode depends on transport, so this option must
>be set in AF_VSOCK implementation where transport is accessible (if
>transport is not set during setting SO_ZEROCOPY: for example socket is
>not connected, then SO_ZEROCOPY will be enabled, but once transport will
>be assigned, support of this type of transmission will be checked).
>
>To handle SO_ZEROCOPY, AF_VSOCK implementation uses SOCK_CUSTOM_SOCKOPT
>bit, thus handling SOL_SOCKET option operations, but all of them except
>SO_ZEROCOPY will be forwarded to the generic handler by calling
>'sock_setsockopt()'.
>
>Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>---
> Changelog:
> v4 -> v5:
>  * This patch is totally reworked. Previous version added check for
>    PF_VSOCK directly to 'net/core/sock.c', thus allowing to set
>    SO_ZEROCOPY for AF_VSOCK type of socket. This new version catches
>    attempt to set SO_ZEROCOPY in 'af_vsock.c'. All other options
>    except SO_ZEROCOPY are forwarded to generic handler. Only this
>    option is processed in 'af_vsock.c'. Handling this option includes
>    access to transport to check that MSG_ZEROCOPY transmission is
>    supported by the current transport (if it is set, if not - transport
>    will be checked during 'connect()').

Yeah, great, this is much better!

>
> net/vmw_vsock/af_vsock.c | 44 ++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 42 insertions(+), 2 deletions(-)
>
>diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>index da22ae0ef477..8acc77981d01 100644
>--- a/net/vmw_vsock/af_vsock.c
>+++ b/net/vmw_vsock/af_vsock.c
>@@ -1406,8 +1406,18 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
> 			goto out;
> 		}
>
>-		if (vsock_msgzerocopy_allow(transport))
>+		if (!vsock_msgzerocopy_allow(transport)) {

Can you leave `if (vsock_msgzerocopy_allow(transport))` and just add
the else branch with this new check?

		if (vsock_msgzerocopy_allow(transport)) {
			...
		} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
			...
		}

>+			/* If this option was set before 'connect()',
>+			 * when transport was unknown, check that this
>+			 * feature is supported here.
>+			 */
>+			if (sock_flag(sk, SOCK_ZEROCOPY)) {
>+				err = -EOPNOTSUPP;
>+				goto out;
>+			}
>+		} else {
> 			set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
>+		}
>
> 		err = vsock_auto_bind(vsk);
> 		if (err)
>@@ -1643,7 +1653,7 @@ static int vsock_connectible_setsockopt(struct socket *sock,
> 	const struct vsock_transport *transport;
> 	u64 val;
>
>-	if (level != AF_VSOCK)
>+	if (level != AF_VSOCK && level != SOL_SOCKET)
> 		return -ENOPROTOOPT;
>
> #define COPY_IN(_v)                                       \
>@@ -1666,6 +1676,34 @@ static int vsock_connectible_setsockopt(struct socket *sock,
>
> 	transport = vsk->transport;
>
>+	if (level == SOL_SOCKET) {

We could reduce the indentation here:
		if (optname != SO_ZEROCOPY) {
			release_sock(sk);
			return sock_setsockopt(sock, level, optname, optval, optlen);
		}

Then remove the next indentation.

>+		if (optname == SO_ZEROCOPY) {
>+			int zc_val;

`zerocopy` is more readable.
>+
>+			/* Use 'int' type here, because variable to
>+			 * set this option usually has this type.
>+			 */
>+			COPY_IN(zc_val);
>+
>+			if (zc_val < 0 || zc_val > 1) {
>+				err = -EINVAL;
>+				goto exit;
>+			}
>+
>+			if (transport && !vsock_msgzerocopy_allow(transport)) {
>+				err = -EOPNOTSUPP;
>+				goto exit;
>+			}
>+
>+			sock_valbool_flag(sk, SOCK_ZEROCOPY,
>+					  zc_val ? true : false);

Why not using directly `zc_val`?
The 3rd param of sock_valbool_flag() is an int.

>+			goto exit;
>+		}
>+
>+		release_sock(sk);
>+		return sock_setsockopt(sock, level, optname, optval, optlen);
>+	}
>+
> 	switch (optname) {
> 	case SO_VM_SOCKETS_BUFFER_SIZE:
> 		COPY_IN(val);
>@@ -2321,6 +2359,8 @@ static int vsock_create(struct net *net, struct socket *sock,
> 		}
> 	}
>
>+	set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
>+
> 	vsock_insert_unbound(vsk);
>
> 	return 0;
>-- 
>2.25.1
>
  
Bobby Eshleman July 12, 2023, 10:31 p.m. UTC | #2
On Sat, Jul 01, 2023 at 09:39:43AM +0300, Arseniy Krasnov wrote:
> For AF_VSOCK, zerocopy tx mode depends on transport, so this option must
> be set in AF_VSOCK implementation where transport is accessible (if
> transport is not set during setting SO_ZEROCOPY: for example socket is
> not connected, then SO_ZEROCOPY will be enabled, but once transport will
> be assigned, support of this type of transmission will be checked).
> 
> To handle SO_ZEROCOPY, AF_VSOCK implementation uses SOCK_CUSTOM_SOCKOPT
> bit, thus handling SOL_SOCKET option operations, but all of them except
> SO_ZEROCOPY will be forwarded to the generic handler by calling
> 'sock_setsockopt()'.
> 
> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
> ---
>  Changelog:
>  v4 -> v5:
>   * This patch is totally reworked. Previous version added check for
>     PF_VSOCK directly to 'net/core/sock.c', thus allowing to set
>     SO_ZEROCOPY for AF_VSOCK type of socket. This new version catches
>     attempt to set SO_ZEROCOPY in 'af_vsock.c'. All other options
>     except SO_ZEROCOPY are forwarded to generic handler. Only this
>     option is processed in 'af_vsock.c'. Handling this option includes
>     access to transport to check that MSG_ZEROCOPY transmission is
>     supported by the current transport (if it is set, if not - transport
>     will be checked during 'connect()').
> 
>  net/vmw_vsock/af_vsock.c | 44 ++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 42 insertions(+), 2 deletions(-)
> 
> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> index da22ae0ef477..8acc77981d01 100644
> --- a/net/vmw_vsock/af_vsock.c
> +++ b/net/vmw_vsock/af_vsock.c
> @@ -1406,8 +1406,18 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
>  			goto out;
>  		}
>  
> -		if (vsock_msgzerocopy_allow(transport))
> +		if (!vsock_msgzerocopy_allow(transport)) {
> +			/* If this option was set before 'connect()',
> +			 * when transport was unknown, check that this
> +			 * feature is supported here.
> +			 */
> +			if (sock_flag(sk, SOCK_ZEROCOPY)) {
> +				err = -EOPNOTSUPP;
> +				goto out;
> +			}
> +		} else {
>  			set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
> +		}
>  
>  		err = vsock_auto_bind(vsk);
>  		if (err)
> @@ -1643,7 +1653,7 @@ static int vsock_connectible_setsockopt(struct socket *sock,
>  	const struct vsock_transport *transport;
>  	u64 val;
>  
> -	if (level != AF_VSOCK)
> +	if (level != AF_VSOCK && level != SOL_SOCKET)
>  		return -ENOPROTOOPT;
>  
>  #define COPY_IN(_v)                                       \
> @@ -1666,6 +1676,34 @@ static int vsock_connectible_setsockopt(struct socket *sock,
>  
>  	transport = vsk->transport;
>  
> +	if (level == SOL_SOCKET) {
> +		if (optname == SO_ZEROCOPY) {
> +			int zc_val;
> +
> +			/* Use 'int' type here, because variable to
> +			 * set this option usually has this type.
> +			 */
> +			COPY_IN(zc_val);
> +
> +			if (zc_val < 0 || zc_val > 1) {
> +				err = -EINVAL;
> +				goto exit;
> +			}
> +
> +			if (transport && !vsock_msgzerocopy_allow(transport)) {
> +				err = -EOPNOTSUPP;
> +				goto exit;
> +			}
> +
> +			sock_valbool_flag(sk, SOCK_ZEROCOPY,
> +					  zc_val ? true : false);
> +			goto exit;
> +		}
> +
> +		release_sock(sk);
> +		return sock_setsockopt(sock, level, optname, optval, optlen);
> +	}
> +
>  	switch (optname) {
>  	case SO_VM_SOCKETS_BUFFER_SIZE:
>  		COPY_IN(val);
> @@ -2321,6 +2359,8 @@ static int vsock_create(struct net *net, struct socket *sock,
>  		}
>  	}
>  
> +	set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
> +

I found that because datagrams have !ops->setsockopt this bit causes
setsockopt() to fail (the related logic can be found in
__sys_setsockopt). Maybe we should only set this for connectibles?

Best,
Bobby

>  	vsock_insert_unbound(vsk);
>  
>  	return 0;
> -- 
> 2.25.1
>
  
Arseniy Krasnov July 13, 2023, 4:37 a.m. UTC | #3
On 13.07.2023 01:31, Bobby Eshleman wrote:
> On Sat, Jul 01, 2023 at 09:39:43AM +0300, Arseniy Krasnov wrote:
>> For AF_VSOCK, zerocopy tx mode depends on transport, so this option must
>> be set in AF_VSOCK implementation where transport is accessible (if
>> transport is not set during setting SO_ZEROCOPY: for example socket is
>> not connected, then SO_ZEROCOPY will be enabled, but once transport will
>> be assigned, support of this type of transmission will be checked).
>>
>> To handle SO_ZEROCOPY, AF_VSOCK implementation uses SOCK_CUSTOM_SOCKOPT
>> bit, thus handling SOL_SOCKET option operations, but all of them except
>> SO_ZEROCOPY will be forwarded to the generic handler by calling
>> 'sock_setsockopt()'.
>>
>> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>> ---
>>  Changelog:
>>  v4 -> v5:
>>   * This patch is totally reworked. Previous version added check for
>>     PF_VSOCK directly to 'net/core/sock.c', thus allowing to set
>>     SO_ZEROCOPY for AF_VSOCK type of socket. This new version catches
>>     attempt to set SO_ZEROCOPY in 'af_vsock.c'. All other options
>>     except SO_ZEROCOPY are forwarded to generic handler. Only this
>>     option is processed in 'af_vsock.c'. Handling this option includes
>>     access to transport to check that MSG_ZEROCOPY transmission is
>>     supported by the current transport (if it is set, if not - transport
>>     will be checked during 'connect()').
>>
>>  net/vmw_vsock/af_vsock.c | 44 ++++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 42 insertions(+), 2 deletions(-)
>>
>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>> index da22ae0ef477..8acc77981d01 100644
>> --- a/net/vmw_vsock/af_vsock.c
>> +++ b/net/vmw_vsock/af_vsock.c
>> @@ -1406,8 +1406,18 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
>>  			goto out;
>>  		}
>>  
>> -		if (vsock_msgzerocopy_allow(transport))
>> +		if (!vsock_msgzerocopy_allow(transport)) {
>> +			/* If this option was set before 'connect()',
>> +			 * when transport was unknown, check that this
>> +			 * feature is supported here.
>> +			 */
>> +			if (sock_flag(sk, SOCK_ZEROCOPY)) {
>> +				err = -EOPNOTSUPP;
>> +				goto out;
>> +			}
>> +		} else {
>>  			set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
>> +		}
>>  
>>  		err = vsock_auto_bind(vsk);
>>  		if (err)
>> @@ -1643,7 +1653,7 @@ static int vsock_connectible_setsockopt(struct socket *sock,
>>  	const struct vsock_transport *transport;
>>  	u64 val;
>>  
>> -	if (level != AF_VSOCK)
>> +	if (level != AF_VSOCK && level != SOL_SOCKET)
>>  		return -ENOPROTOOPT;
>>  
>>  #define COPY_IN(_v)                                       \
>> @@ -1666,6 +1676,34 @@ static int vsock_connectible_setsockopt(struct socket *sock,
>>  
>>  	transport = vsk->transport;
>>  
>> +	if (level == SOL_SOCKET) {
>> +		if (optname == SO_ZEROCOPY) {
>> +			int zc_val;
>> +
>> +			/* Use 'int' type here, because variable to
>> +			 * set this option usually has this type.
>> +			 */
>> +			COPY_IN(zc_val);
>> +
>> +			if (zc_val < 0 || zc_val > 1) {
>> +				err = -EINVAL;
>> +				goto exit;
>> +			}
>> +
>> +			if (transport && !vsock_msgzerocopy_allow(transport)) {
>> +				err = -EOPNOTSUPP;
>> +				goto exit;
>> +			}
>> +
>> +			sock_valbool_flag(sk, SOCK_ZEROCOPY,
>> +					  zc_val ? true : false);
>> +			goto exit;
>> +		}
>> +
>> +		release_sock(sk);
>> +		return sock_setsockopt(sock, level, optname, optval, optlen);
>> +	}
>> +
>>  	switch (optname) {
>>  	case SO_VM_SOCKETS_BUFFER_SIZE:
>>  		COPY_IN(val);
>> @@ -2321,6 +2359,8 @@ static int vsock_create(struct net *net, struct socket *sock,
>>  		}
>>  	}
>>  
>> +	set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
>> +
> 
> I found that because datagrams have !ops->setsockopt this bit causes
> setsockopt() to fail (the related logic can be found in
> __sys_setsockopt). Maybe we should only set this for connectibles?

Agree! I'll add this check in the next version

Thanks, Arseniy

> 
> Best,
> Bobby
> 
>>  	vsock_insert_unbound(vsk);
>>  
>>  	return 0;
>> -- 
>> 2.25.1
>>
  

Patch

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index da22ae0ef477..8acc77981d01 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1406,8 +1406,18 @@  static int vsock_connect(struct socket *sock, struct sockaddr *addr,
 			goto out;
 		}
 
-		if (vsock_msgzerocopy_allow(transport))
+		if (!vsock_msgzerocopy_allow(transport)) {
+			/* If this option was set before 'connect()',
+			 * when transport was unknown, check that this
+			 * feature is supported here.
+			 */
+			if (sock_flag(sk, SOCK_ZEROCOPY)) {
+				err = -EOPNOTSUPP;
+				goto out;
+			}
+		} else {
 			set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
+		}
 
 		err = vsock_auto_bind(vsk);
 		if (err)
@@ -1643,7 +1653,7 @@  static int vsock_connectible_setsockopt(struct socket *sock,
 	const struct vsock_transport *transport;
 	u64 val;
 
-	if (level != AF_VSOCK)
+	if (level != AF_VSOCK && level != SOL_SOCKET)
 		return -ENOPROTOOPT;
 
 #define COPY_IN(_v)                                       \
@@ -1666,6 +1676,34 @@  static int vsock_connectible_setsockopt(struct socket *sock,
 
 	transport = vsk->transport;
 
+	if (level == SOL_SOCKET) {
+		if (optname == SO_ZEROCOPY) {
+			int zc_val;
+
+			/* Use 'int' type here, because variable to
+			 * set this option usually has this type.
+			 */
+			COPY_IN(zc_val);
+
+			if (zc_val < 0 || zc_val > 1) {
+				err = -EINVAL;
+				goto exit;
+			}
+
+			if (transport && !vsock_msgzerocopy_allow(transport)) {
+				err = -EOPNOTSUPP;
+				goto exit;
+			}
+
+			sock_valbool_flag(sk, SOCK_ZEROCOPY,
+					  zc_val ? true : false);
+			goto exit;
+		}
+
+		release_sock(sk);
+		return sock_setsockopt(sock, level, optname, optval, optlen);
+	}
+
 	switch (optname) {
 	case SO_VM_SOCKETS_BUFFER_SIZE:
 		COPY_IN(val);
@@ -2321,6 +2359,8 @@  static int vsock_create(struct net *net, struct socket *sock,
 		}
 	}
 
+	set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
+
 	vsock_insert_unbound(vsk);
 
 	return 0;