[net,2/2] net/smc: Use correct buffer sizes when switching between TCP and SMC
Commit Message
Tuning of the effective buffer size through setsockopts was working for
SMC traffic only but not for TCP fall-back connections even before
commit 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and
make them tunable"). That change made it apparent that TCP fall-back
connections would use net.smc.[rw]mem as buffer size instead of
net.ipv4_tcp_[rw]mem.
Amend the code that copies attributes between the (TCP) clcsock and the
SMC socket and adjust buffer sizes appropriately:
- Copy over sk_userlocks so that both sockets agree on whether tuning
via setsockopt is active.
- When falling back to TCP use sk_sndbuf or sk_rcvbuf as specified with
setsockopt. Otherwise, use the sysctl value for TCP/IPv4.
- Likewise, use either values from setsockopt or from sysctl for SMC
(duplicated) on successful SMC connect.
In smc_tcp_listen_work() drop the explicit copy of buffer sizes as that
is taken care of by the attribute copy.
Fixes: 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable")
Signed-off-by: Gerd Bayer <gbayer@linux.ibm.com>
Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
Reviewed-by: Jan Karcher <jaka@linux.ibm.com>
---
net/smc/af_smc.c | 76 ++++++++++++++++++++++++++++++++++--------------
1 file changed, 54 insertions(+), 22 deletions(-)
Comments
On Wed, Aug 02, 2023 at 11:33:13AM +0200, Gerd Bayer wrote:
> Tuning of the effective buffer size through setsockopts was working for
> SMC traffic only but not for TCP fall-back connections even before
> commit 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and
> make them tunable"). That change made it apparent that TCP fall-back
> connections would use net.smc.[rw]mem as buffer size instead of
> net.ipv4_tcp_[rw]mem.
>
> Amend the code that copies attributes between the (TCP) clcsock and the
> SMC socket and adjust buffer sizes appropriately:
> - Copy over sk_userlocks so that both sockets agree on whether tuning
> via setsockopt is active.
> - When falling back to TCP use sk_sndbuf or sk_rcvbuf as specified with
> setsockopt. Otherwise, use the sysctl value for TCP/IPv4.
> - Likewise, use either values from setsockopt or from sysctl for SMC
> (duplicated) on successful SMC connect.
>
> In smc_tcp_listen_work() drop the explicit copy of buffer sizes as that
> is taken care of by the attribute copy.
>
> Fixes: 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable")
> Signed-off-by: Gerd Bayer <gbayer@linux.ibm.com>
> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
> Reviewed-by: Jan Karcher <jaka@linux.ibm.com>
Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
>
^^^^ nit: a extra new line here.
> ---
> net/smc/af_smc.c | 76 ++++++++++++++++++++++++++++++++++--------------
> 1 file changed, 54 insertions(+), 22 deletions(-)
>
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index 1fcf1e42474a..1c8ed19041d7 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -436,13 +436,63 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
> return rc;
> }
>
> +/* copy only relevant settings and flags of SOL_SOCKET level from smc to
> + * clc socket (since smc is not called for these options from net/core)
> + */
> +
> +#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
> + (1UL << SOCK_KEEPOPEN) | \
> + (1UL << SOCK_LINGER) | \
> + (1UL << SOCK_BROADCAST) | \
> + (1UL << SOCK_TIMESTAMP) | \
> + (1UL << SOCK_DBG) | \
> + (1UL << SOCK_RCVTSTAMP) | \
> + (1UL << SOCK_RCVTSTAMPNS) | \
> + (1UL << SOCK_LOCALROUTE) | \
> + (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
> + (1UL << SOCK_RXQ_OVFL) | \
> + (1UL << SOCK_WIFI_STATUS) | \
> + (1UL << SOCK_NOFCS) | \
> + (1UL << SOCK_FILTER_LOCKED) | \
> + (1UL << SOCK_TSTAMP_NEW))
> +
> +/* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */
> +static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
> + unsigned long mask)
> +{
> + struct net *nnet;
> +
> + nnet = nsk->sk_net.net;
Better to combine these two lines with existed helper.
struct net *net = sock_net(nsk);
> +
> + nsk->sk_userlocks = osk->sk_userlocks;
> +
> + if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
> + nsk->sk_sndbuf = osk->sk_sndbuf;
> + } else {
> + if (mask == SK_FLAGS_SMC_TO_CLC)
> + WRITE_ONCE(nsk->sk_sndbuf,
> + READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
> + else
> + WRITE_ONCE(nsk->sk_sndbuf,
> + 2 * READ_ONCE(nnet->smc.sysctl_wmem));
> + }
> + if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
> + nsk->sk_rcvbuf = osk->sk_rcvbuf;
> + } else {
> + if (mask == SK_FLAGS_SMC_TO_CLC)
> + WRITE_ONCE(nsk->sk_rcvbuf,
> + READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
> + else
> + WRITE_ONCE(nsk->sk_rcvbuf,
> + 2 * READ_ONCE(nnet->smc.sysctl_rmem));
> + }
> +}
> +
> static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
> unsigned long mask)
> {
> /* options we don't get control via setsockopt for */
> nsk->sk_type = osk->sk_type;
> - nsk->sk_sndbuf = osk->sk_sndbuf;
> - nsk->sk_rcvbuf = osk->sk_rcvbuf;
> nsk->sk_sndtimeo = osk->sk_sndtimeo;
> nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
> nsk->sk_mark = osk->sk_mark;
> @@ -453,26 +503,10 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
>
> nsk->sk_flags &= ~mask;
> nsk->sk_flags |= osk->sk_flags & mask;
> +
> + smc_adjust_sock_bufsizes(nsk, osk, mask);
> }
>
> -#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
> - (1UL << SOCK_KEEPOPEN) | \
> - (1UL << SOCK_LINGER) | \
> - (1UL << SOCK_BROADCAST) | \
> - (1UL << SOCK_TIMESTAMP) | \
> - (1UL << SOCK_DBG) | \
> - (1UL << SOCK_RCVTSTAMP) | \
> - (1UL << SOCK_RCVTSTAMPNS) | \
> - (1UL << SOCK_LOCALROUTE) | \
> - (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
> - (1UL << SOCK_RXQ_OVFL) | \
> - (1UL << SOCK_WIFI_STATUS) | \
> - (1UL << SOCK_NOFCS) | \
> - (1UL << SOCK_FILTER_LOCKED) | \
> - (1UL << SOCK_TSTAMP_NEW))
> -/* copy only relevant settings and flags of SOL_SOCKET level from smc to
> - * clc socket (since smc is not called for these options from net/core)
> - */
> static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
> {
> smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
> @@ -2479,8 +2513,6 @@ static void smc_tcp_listen_work(struct work_struct *work)
> sock_hold(lsk); /* sock_put in smc_listen_work */
> INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
> smc_copy_sock_settings_to_smc(new_smc);
> - new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
> - new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
> sock_hold(&new_smc->sk); /* sock_put in passive closing */
> if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
> sock_put(&new_smc->sk);
> --
> 2.41.0
On Wed, 2023-08-02 at 20:43 +0800, Tony Lu wrote:
> On Wed, Aug 02, 2023 at 11:33:13AM +0200, Gerd Bayer wrote:
> > Tuning of the effective buffer size through setsockopts was working
> > for
> > SMC traffic only but not for TCP fall-back connections even before
> > commit 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock
> > and
> > make them tunable"). That change made it apparent that TCP fall-
> > back
> > connections would use net.smc.[rw]mem as buffer size instead of
> > net.ipv4_tcp_[rw]mem.
> >
> > Amend the code that copies attributes between the (TCP) clcsock and
> > the
> > SMC socket and adjust buffer sizes appropriately:
> > - Copy over sk_userlocks so that both sockets agree on whether
> > tuning
> > via setsockopt is active.
> > - When falling back to TCP use sk_sndbuf or sk_rcvbuf as specified
> > with
> > setsockopt. Otherwise, use the sysctl value for TCP/IPv4.
> > - Likewise, use either values from setsockopt or from sysctl for
> > SMC
> > (duplicated) on successful SMC connect.
> >
> > In smc_tcp_listen_work() drop the explicit copy of buffer sizes as
> > that
> > is taken care of by the attribute copy.
> >
> > Fixes: 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock
> > and make them tunable")
> > Signed-off-by: Gerd Bayer <gbayer@linux.ibm.com>
> > Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
> > Reviewed-by: Jan Karcher <jaka@linux.ibm.com>
>
> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
>
> >
> ^^^^ nit: a extra new line here.
I'll clean that up.
> > ---
> > net/smc/af_smc.c | 76 ++++++++++++++++++++++++++++++++++----------
> > ----
> > 1 file changed, 54 insertions(+), 22 deletions(-)
> >
> >
[...]
> > +/* if set, use value set by setsockopt() - else use IPv4 or SMC
> > sysctl value */
> > +static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock
> > *osk,
> > + unsigned long mask)
> > +{
> > + struct net *nnet;
> > +
> > + nnet = nsk->sk_net.net;
>
> Better to combine these two lines with existed helper.
>
> struct net *net = sock_net(nsk);
Yes, looks much cleaner.
[...]
>
Thank you Tony for your review and comments.
I'll be sending out a v2 with your recommendations - but give people a
little more time to look at this version.
Thanks,
Gerd
Hi Gerd,
kernel test robot noticed the following build errors:
[auto build test ERROR on net-next/main]
[also build test ERROR on linus/master v6.5-rc4 next-20230802]
[cannot apply to net/main]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Gerd-Bayer/net-smc-Fix-setsockopt-and-sysctl-to-specify-same-buffer-size-again/20230802-193805
base: net-next/main
patch link: https://lore.kernel.org/r/20230802093313.1501605-3-gbayer%40linux.ibm.com
patch subject: [PATCH net 2/2] net/smc: Use correct buffer sizes when switching between TCP and SMC
config: nios2-randconfig-r006-20230731 (https://download.01.org/0day-ci/archive/20230803/202308030722.dV3X9uUQ-lkp@intel.com/config)
compiler: nios2-linux-gcc (GCC) 12.3.0
reproduce: (https://download.01.org/0day-ci/archive/20230803/202308030722.dV3X9uUQ-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202308030722.dV3X9uUQ-lkp@intel.com/
All errors (new ones prefixed by >>):
net/smc/af_smc.c: In function 'smc_adjust_sock_bufsizes':
>> net/smc/af_smc.c:465:27: error: 'possible_net_t' has no member named 'net'
465 | nnet = nsk->sk_net.net;
| ^
vim +465 net/smc/af_smc.c
438
439 /* copy only relevant settings and flags of SOL_SOCKET level from smc to
440 * clc socket (since smc is not called for these options from net/core)
441 */
442
443 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
444 (1UL << SOCK_KEEPOPEN) | \
445 (1UL << SOCK_LINGER) | \
446 (1UL << SOCK_BROADCAST) | \
447 (1UL << SOCK_TIMESTAMP) | \
448 (1UL << SOCK_DBG) | \
449 (1UL << SOCK_RCVTSTAMP) | \
450 (1UL << SOCK_RCVTSTAMPNS) | \
451 (1UL << SOCK_LOCALROUTE) | \
452 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
453 (1UL << SOCK_RXQ_OVFL) | \
454 (1UL << SOCK_WIFI_STATUS) | \
455 (1UL << SOCK_NOFCS) | \
456 (1UL << SOCK_FILTER_LOCKED) | \
457 (1UL << SOCK_TSTAMP_NEW))
458
459 /* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */
460 static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
461 unsigned long mask)
462 {
463 struct net *nnet;
464
> 465 nnet = nsk->sk_net.net;
466
467 nsk->sk_userlocks = osk->sk_userlocks;
468
469 if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
470 nsk->sk_sndbuf = osk->sk_sndbuf;
471 } else {
472 if (mask == SK_FLAGS_SMC_TO_CLC)
473 WRITE_ONCE(nsk->sk_sndbuf,
474 READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
475 else
476 WRITE_ONCE(nsk->sk_sndbuf,
477 2 * READ_ONCE(nnet->smc.sysctl_wmem));
478 }
479 if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
480 nsk->sk_rcvbuf = osk->sk_rcvbuf;
481 } else {
482 if (mask == SK_FLAGS_SMC_TO_CLC)
483 WRITE_ONCE(nsk->sk_rcvbuf,
484 READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
485 else
486 WRITE_ONCE(nsk->sk_rcvbuf,
487 2 * READ_ONCE(nnet->smc.sysctl_rmem));
488 }
489 }
490
@@ -436,13 +436,63 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
return rc;
}
+/* copy only relevant settings and flags of SOL_SOCKET level from smc to
+ * clc socket (since smc is not called for these options from net/core)
+ */
+
+#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
+ (1UL << SOCK_KEEPOPEN) | \
+ (1UL << SOCK_LINGER) | \
+ (1UL << SOCK_BROADCAST) | \
+ (1UL << SOCK_TIMESTAMP) | \
+ (1UL << SOCK_DBG) | \
+ (1UL << SOCK_RCVTSTAMP) | \
+ (1UL << SOCK_RCVTSTAMPNS) | \
+ (1UL << SOCK_LOCALROUTE) | \
+ (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
+ (1UL << SOCK_RXQ_OVFL) | \
+ (1UL << SOCK_WIFI_STATUS) | \
+ (1UL << SOCK_NOFCS) | \
+ (1UL << SOCK_FILTER_LOCKED) | \
+ (1UL << SOCK_TSTAMP_NEW))
+
+/* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */
+static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
+ unsigned long mask)
+{
+ struct net *nnet;
+
+ nnet = nsk->sk_net.net;
+
+ nsk->sk_userlocks = osk->sk_userlocks;
+
+ if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
+ nsk->sk_sndbuf = osk->sk_sndbuf;
+ } else {
+ if (mask == SK_FLAGS_SMC_TO_CLC)
+ WRITE_ONCE(nsk->sk_sndbuf,
+ READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
+ else
+ WRITE_ONCE(nsk->sk_sndbuf,
+ 2 * READ_ONCE(nnet->smc.sysctl_wmem));
+ }
+ if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
+ nsk->sk_rcvbuf = osk->sk_rcvbuf;
+ } else {
+ if (mask == SK_FLAGS_SMC_TO_CLC)
+ WRITE_ONCE(nsk->sk_rcvbuf,
+ READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
+ else
+ WRITE_ONCE(nsk->sk_rcvbuf,
+ 2 * READ_ONCE(nnet->smc.sysctl_rmem));
+ }
+}
+
static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
unsigned long mask)
{
/* options we don't get control via setsockopt for */
nsk->sk_type = osk->sk_type;
- nsk->sk_sndbuf = osk->sk_sndbuf;
- nsk->sk_rcvbuf = osk->sk_rcvbuf;
nsk->sk_sndtimeo = osk->sk_sndtimeo;
nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
nsk->sk_mark = osk->sk_mark;
@@ -453,26 +503,10 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
nsk->sk_flags &= ~mask;
nsk->sk_flags |= osk->sk_flags & mask;
+
+ smc_adjust_sock_bufsizes(nsk, osk, mask);
}
-#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
- (1UL << SOCK_KEEPOPEN) | \
- (1UL << SOCK_LINGER) | \
- (1UL << SOCK_BROADCAST) | \
- (1UL << SOCK_TIMESTAMP) | \
- (1UL << SOCK_DBG) | \
- (1UL << SOCK_RCVTSTAMP) | \
- (1UL << SOCK_RCVTSTAMPNS) | \
- (1UL << SOCK_LOCALROUTE) | \
- (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
- (1UL << SOCK_RXQ_OVFL) | \
- (1UL << SOCK_WIFI_STATUS) | \
- (1UL << SOCK_NOFCS) | \
- (1UL << SOCK_FILTER_LOCKED) | \
- (1UL << SOCK_TSTAMP_NEW))
-/* copy only relevant settings and flags of SOL_SOCKET level from smc to
- * clc socket (since smc is not called for these options from net/core)
- */
static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
{
smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
@@ -2479,8 +2513,6 @@ static void smc_tcp_listen_work(struct work_struct *work)
sock_hold(lsk); /* sock_put in smc_listen_work */
INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
smc_copy_sock_settings_to_smc(new_smc);
- new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
- new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
sock_hold(&new_smc->sk); /* sock_put in passive closing */
if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
sock_put(&new_smc->sk);