hv_netvsc:Register VF in netvsc_probe if NET_DEVICE_REGISTER missed

Message ID 1706599135-12651-1-git-send-email-shradhagupta@linux.microsoft.com
State New
Headers
Series hv_netvsc:Register VF in netvsc_probe if NET_DEVICE_REGISTER missed |

Commit Message

Shradha Gupta Jan. 30, 2024, 7:18 a.m. UTC
  If hv_netvsc driver is removed and reloaded, the NET_DEVICE_REGISTER
handler cannot perform VF register successfully as the register call
is received before netvsc_probe is finished. This is because we
register register_netdevice_notifier() very early(even before
vmbus_driver_register()).
To fix this, we try to register each such matching VF( if it is visible
as a netdevice) at the end of netvsc_probe.

Cc: stable@vger.kernel.org
Fixes: 85520856466e ("hv_netvsc: Fix race of register_netdevice_notifier and VF register")
Suggested-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
Tested-on: Ubuntu22
Testcases: LISA testsuites
	   verify_reload_hyperv_modules, perf_tcp_ntttcp_sriov
---
 drivers/net/hyperv/netvsc_drv.c | 49 ++++++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 7 deletions(-)
  

Comments

Haiyang Zhang Jan. 30, 2024, 1:43 p.m. UTC | #1
> -----Original Message-----
> From: Shradha Gupta <shradhagupta@linux.microsoft.com>
> Sent: Tuesday, January 30, 2024 2:19 AM
> To: KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
> <haiyangz@microsoft.com>; Wei Liu <wei.liu@kernel.org>; Dexuan Cui
> <decui@microsoft.com>; David S. Miller <davem@davemloft.net>; Eric
> Dumazet <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>; Paolo
> Abeni <pabeni@redhat.com>; Wojciech Drewek <wojciech.drewek@intel.com>;
> linux-hyperv@vger.kernel.org; netdev@vger.kernel.org; linux-
> kernel@vger.kernel.org
> Cc: Shradha Gupta <shradhagupta@linux.microsoft.com>; Shradha Gupta
> <shradhagupta@microsoft.com>; stable@vger.kernel.org
> Subject: [PATCH] hv_netvsc:Register VF in netvsc_probe if
> NET_DEVICE_REGISTER missed
> 
> If hv_netvsc driver is removed and reloaded, the NET_DEVICE_REGISTER
> handler cannot perform VF register successfully as the register call
> is received before netvsc_probe is finished. This is because we
> register register_netdevice_notifier() very early(even before
> vmbus_driver_register()).
> To fix this, we try to register each such matching VF( if it is visible
> as a netdevice) at the end of netvsc_probe.
> 
> Cc: stable@vger.kernel.org
> Fixes: 85520856466e ("hv_netvsc: Fix race of register_netdevice_notifier
> and VF register")
> Suggested-by: Dexuan Cui <decui@microsoft.com>
> Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
> Tested-on: Ubuntu22
> Testcases: LISA testsuites
> 	   verify_reload_hyperv_modules, perf_tcp_ntttcp_sriov
> ---
>  drivers/net/hyperv/netvsc_drv.c | 49 ++++++++++++++++++++++++++++-----
>  1 file changed, 42 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/hyperv/netvsc_drv.c
> b/drivers/net/hyperv/netvsc_drv.c
> index 706ea5263e87..25c4dc9cc4bd 100644
> --- a/drivers/net/hyperv/netvsc_drv.c
> +++ b/drivers/net/hyperv/netvsc_drv.c
> @@ -42,6 +42,10 @@
>  #define LINKCHANGE_INT (2 * HZ)
>  #define VF_TAKEOVER_INT (HZ / 10)
> 
> +/* Macros to define the context of vf registration */
> +#define VF_REG_IN_PROBE		1
> +#define VF_REG_IN_RECV_CBACK	2
> +
>  static unsigned int ring_size __ro_after_init = 128;
>  module_param(ring_size, uint, 0444);
>  MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
> @@ -2183,7 +2187,7 @@ static rx_handler_result_t
> netvsc_vf_handle_frame(struct sk_buff **pskb)
>  }
> 
>  static int netvsc_vf_join(struct net_device *vf_netdev,
> -			  struct net_device *ndev)
> +			  struct net_device *ndev, int context)
>  {
>  	struct net_device_context *ndev_ctx = netdev_priv(ndev);
>  	int ret;
> @@ -2205,8 +2209,11 @@ static int netvsc_vf_join(struct net_device
> *vf_netdev,
>  			   ndev->name, ret);
>  		goto upper_link_failed;
>  	}
> -
> -	schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
> +	/* If this registration is called from probe context vf_takeover
> +	 * is taken care of later in probe itself.
> +	 */
> +	if (context == VF_REG_IN_RECV_CBACK)
> +		schedule_delayed_work(&ndev_ctx->vf_takeover,
> VF_TAKEOVER_INT);
> 
>  	call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
> 
> @@ -2344,7 +2351,7 @@ static int netvsc_prepare_bonding(struct net_device
> *vf_netdev)
>  	return NOTIFY_DONE;
>  }
> 
> -static int netvsc_register_vf(struct net_device *vf_netdev)
> +static int netvsc_register_vf(struct net_device *vf_netdev, int context)
>  {
>  	struct net_device_context *net_device_ctx;
>  	struct netvsc_device *netvsc_dev;
> @@ -2384,7 +2391,7 @@ static int netvsc_register_vf(struct net_device
> *vf_netdev)
> 
>  	netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
> 
> -	if (netvsc_vf_join(vf_netdev, ndev) != 0)
> +	if (netvsc_vf_join(vf_netdev, ndev, context) != 0)
>  		return NOTIFY_DONE;
> 
>  	dev_hold(vf_netdev);
> @@ -2485,7 +2492,7 @@ static int netvsc_unregister_vf(struct net_device
> *vf_netdev)
>  static int netvsc_probe(struct hv_device *dev,
>  			const struct hv_vmbus_device_id *dev_id)
>  {
> -	struct net_device *net = NULL;
> +	struct net_device *net = NULL, *vf_netdev;
>  	struct net_device_context *net_device_ctx;
>  	struct netvsc_device_info *device_info = NULL;
>  	struct netvsc_device *nvdev;
> @@ -2597,6 +2604,34 @@ static int netvsc_probe(struct hv_device *dev,
>  	}
> 
>  	list_add(&net_device_ctx->list, &netvsc_dev_list);
> +
> +	/* When the hv_netvsc driver is removed and readded, the
> +	 * NET_DEVICE_REGISTER for the vf device is replayed before probe
> +	 * is complete. This is because register_netdevice_notifier() gets
> +	 * registered before vmbus_driver_register() so that callback func
> +	 * is set before probe and we don't miss events like
> NETDEV_POST_INIT
> +	 * So, in this section we try to register each matching
> +	 * vf device that is present as a netdevice, knowing that it's
> register
> +	 * call is not processed in the netvsc_netdev_notifier(as probing
> is
> +	 * progress and get_netvsc_byslot fails).
> +	 */
> +	for_each_netdev(dev_net(net), vf_netdev) {
> +		if (vf_netdev->netdev_ops == &device_ops)
> +			continue;
> +
> +		if (vf_netdev->type != ARPHRD_ETHER)
> +			continue;
> +
> +		if (is_vlan_dev(vf_netdev))
> +			continue;
> +
> +		if (netif_is_bond_master(vf_netdev))
> +			continue;
> +
> +		netvsc_prepare_bonding(vf_netdev);
> +		netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
> +		__netvsc_vf_setup(net, vf_netdev);
> +	}
>  	rtnl_unlock();
> 
>  	netvsc_devinfo_put(device_info);
> @@ -2773,7 +2808,7 @@ static int netvsc_netdev_event(struct
> notifier_block *this,
>  	case NETDEV_POST_INIT:
>  		return netvsc_prepare_bonding(event_dev);
>  	case NETDEV_REGISTER:
> -		return netvsc_register_vf(event_dev);
> +		return netvsc_register_vf(event_dev, VF_REG_IN_RECV_CBACK);
>  	case NETDEV_UNREGISTER:
>  		return netvsc_unregister_vf(event_dev);
>  	case NETDEV_UP:
> --
> 2.34.1

Please use [PATCH net] on the subject to specify the branch.

Everything else looks fine.

Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
  
Dexuan Cui Jan. 30, 2024, 8:13 p.m. UTC | #2
> From: Shradha Gupta <shradhagupta@linux.microsoft.com>
> Sent: Monday, January 29, 2024 11:19 PM
>  [...]
> If hv_netvsc driver is removed and reloaded, the NET_DEVICE_REGISTER

s/removed/unloaded/
unloaded looks more accurate to me :-)

> [...]
> Tested-on: Ubuntu22
> Testcases: LISA testsuites
> 	   verify_reload_hyperv_modules, perf_tcp_ntttcp_sriov
IMO the 3 lines can be removed: this bug is not specific to Ubuntu, and the
test case names don't provide extra value to help understand the issue
here and they might cause more questions unnecessarily, e.g. what's LISA,
what exactly do the test cases do.

> +/* Macros to define the context of vf registration */
> +#define VF_REG_IN_PROBE		1
> +#define VF_REG_IN_RECV_CBACK	2

I think VF_REG_IN_NOTIFIER is a better name?
RECV_CBALL looks inaccurate to me.

> @@ -2205,8 +2209,11 @@ static int netvsc_vf_join(struct net_device
> *vf_netdev,
>  			   ndev->name, ret);
>  		goto upper_link_failed;
>  	}
> -
> -	schedule_delayed_work(&ndev_ctx->vf_takeover,
> VF_TAKEOVER_INT);
> +	/* If this registration is called from probe context vf_takeover
> +	 * is taken care of later in probe itself.
I suspect "later in probe itself" is not accurate.
If 'context' is VF_REG_IN_PROBE, I suppose what happens here is:
after netvsc_probe() finishes, the netvsc interface becomes available,
so the user space will ifup it, and netvsc_open() will UP the VF interface,
and netvsc_netdev_event() is called for the VF with event ==
NETDEV_POST_INIT (?) and NETDEV_CHANGE, and the data path is
switched to the VF.

If my understanding is correct, I think in the case of 'context' ==
VF_REG_IN_PROBE, I suspect the "Align MTU of VF with master"
and the "sync address list from ndev to VF" in __netvsc_vf_setup() are
omitted? If so, should this be fixed? e.g. Not sure if the below is an issue or not:
1) a VF is bound to a NetVSC interface, and a user sets the MTUs to 1024.
2) rmmod hv_netvsc
3) modprobe hv_netvsc
4) the netvsc interface uses MTU=1500 (the default), and the VF still uses 1024.

> @@ -2597,6 +2604,34 @@ static int netvsc_probe(struct hv_device *dev,
>  	}
> 
>  	list_add(&net_device_ctx->list, &netvsc_dev_list);
> +
> +	/* When the hv_netvsc driver is removed and readded, the

s/removed and readded/unloaded and reloaded/

> +	 * NET_DEVICE_REGISTER for the vf device is replayed before
> probe
> +	 * is complete. This is because register_netdevice_notifier() gets
> +	 * registered before vmbus_driver_register() so that callback func
> +	 * is set before probe and we don't miss events like
> NETDEV_POST_INIT
> +	 * So, in this section we try to register each matching

Looks like there are spaces at the end of the line. We can move up a few words
from the next line :-)

s/each matching/the matching/
A NetVSC interface has only 1 matching VF device.

> +	 * vf device that is present as a netdevice, knowing that it's register

s/it's/its/

> +	 * call is not processed in the netvsc_netdev_notifier(as probing is
> +	 * progress and get_netvsc_byslot fails).
> +	 */
> +	for_each_netdev(dev_net(net), vf_netdev) {
> +		if (vf_netdev->netdev_ops == &device_ops)
> +			continue;
> +
> +		if (vf_netdev->type != ARPHRD_ETHER)
> +			continue;
> +
> +		if (is_vlan_dev(vf_netdev))
> +			continue;
> +
> +		if (netif_is_bond_master(vf_netdev))
> +			continue;

The code above is duplicated from netvsc_netdev_event().
Can we add a new helper function is_matching_vf() to avoid the duplication?

> +		netvsc_prepare_bonding(vf_netdev);
> +		netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
> +		__netvsc_vf_setup(net, vf_netdev);

add a "break;' ?

> +	}
>  	rtnl_unlock();
  
Haiyang Zhang Jan. 30, 2024, 10:05 p.m. UTC | #3
> -----Original Message-----
> From: Dexuan Cui <decui@microsoft.com>
> Sent: Tuesday, January 30, 2024 3:13 PM
> To: Shradha Gupta <shradhagupta@linux.microsoft.com>; KY Srinivasan


> 
> > @@ -2205,8 +2209,11 @@ static int netvsc_vf_join(struct net_device
> > *vf_netdev,
> >  			   ndev->name, ret);
> >  		goto upper_link_failed;
> >  	}
> > -
> > -	schedule_delayed_work(&ndev_ctx->vf_takeover,
> > VF_TAKEOVER_INT);
> > +	/* If this registration is called from probe context vf_takeover
> > +	 * is taken care of later in probe itself.
> I suspect "later in probe itself" is not accurate.
> If 'context' is VF_REG_IN_PROBE, I suppose what happens here is:
> after netvsc_probe() finishes, the netvsc interface becomes available,
> so the user space will ifup it, and netvsc_open() will UP the VF
> interface,
> and netvsc_netdev_event() is called for the VF with event ==
> NETDEV_POST_INIT (?) and NETDEV_CHANGE, and the data path is
> switched to the VF.

In register_netdevice(), NETDEV_POST_INIT is earlier than NETDEV_REGISTER.
This case: netvsc_open >> dev_open(vf) >> NETDEV_UP >>
 netvsc_vf_changed(event_dev, event);

> 
> If my understanding is correct, I think in the case of 'context' ==
> VF_REG_IN_PROBE, I suspect the "Align MTU of VF with master"
> and the "sync address list from ndev to VF" in __netvsc_vf_setup() are
> omitted? If so, should this be fixed? e.g. Not sure if the below is an
> issue or not:
> 1) a VF is bound to a NetVSC interface, and a user sets the MTUs to 1024.
> 2) rmmod hv_netvsc
> 3) modprobe hv_netvsc
> 4) the netvsc interface uses MTU=1500 (the default), and the VF still
> uses 1024.

__netvsc_vf_setup() is skipped from the netvsc_register_vf >> netvsc_vf_join(),
but called from netvsc_probe(), so the VF mtu is sync-ed to 1500.
I verified mtu sync in test.

Thanks,
- Haiyang
  
Jakub Kicinski Jan. 31, 2024, 2:29 a.m. UTC | #4
On Mon, 29 Jan 2024 23:18:55 -0800 Shradha Gupta wrote:
> If hv_netvsc driver is removed and reloaded, the NET_DEVICE_REGISTER
> handler cannot perform VF register successfully as the register call
> is received before netvsc_probe is finished. This is because we
> register register_netdevice_notifier() very early(even before
> vmbus_driver_register()).
> To fix this, we try to register each such matching VF( if it is visible
> as a netdevice) at the end of netvsc_probe.
> 
> Cc: stable@vger.kernel.org
> Fixes: 85520856466e ("hv_netvsc: Fix race of register_netdevice_notifier and VF register")
> Suggested-by: Dexuan Cui <decui@microsoft.com>
> Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com>

Does not seem to apply to net/main, please respin
  
Shradha Gupta Jan. 31, 2024, 6:09 a.m. UTC | #5
On Tue, Jan 30, 2024 at 06:29:14PM -0800, Jakub Kicinski wrote:
> On Mon, 29 Jan 2024 23:18:55 -0800 Shradha Gupta wrote:
> > If hv_netvsc driver is removed and reloaded, the NET_DEVICE_REGISTER
> > handler cannot perform VF register successfully as the register call
> > is received before netvsc_probe is finished. This is because we
> > register register_netdevice_notifier() very early(even before
> > vmbus_driver_register()).
> > To fix this, we try to register each such matching VF( if it is visible
> > as a netdevice) at the end of netvsc_probe.
> > 
> > Cc: stable@vger.kernel.org
> > Fixes: 85520856466e ("hv_netvsc: Fix race of register_netdevice_notifier and VF register")
> > Suggested-by: Dexuan Cui <decui@microsoft.com>
> > Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
> 
> Does not seem to apply to net/main, please respin
This patch applies to net, which is missed in the subject. I will fix this in the new version of the patch. Thanks
  
Shradha Gupta Jan. 31, 2024, 7:54 a.m. UTC | #6
On Tue, Jan 30, 2024 at 08:13:21PM +0000, Dexuan Cui wrote:
> > From: Shradha Gupta <shradhagupta@linux.microsoft.com>
> > Sent: Monday, January 29, 2024 11:19 PM
> >  [...]
> > If hv_netvsc driver is removed and reloaded, the NET_DEVICE_REGISTER
> 
> s/removed/unloaded/
> unloaded looks more accurate to me :-)
> 
> > [...]
> > Tested-on: Ubuntu22
> > Testcases: LISA testsuites
> > 	   verify_reload_hyperv_modules, perf_tcp_ntttcp_sriov
> IMO the 3 lines can be removed: this bug is not specific to Ubuntu, and the
> test case names don't provide extra value to help understand the issue
> here and they might cause more questions unnecessarily, e.g. what's LISA,
> what exactly do the test cases do.
> 
> > +/* Macros to define the context of vf registration */
> > +#define VF_REG_IN_PROBE		1
> > +#define VF_REG_IN_RECV_CBACK	2
> 
> I think VF_REG_IN_NOTIFIER is a better name?
> RECV_CBALL looks inaccurate to me.
> 
> > @@ -2205,8 +2209,11 @@ static int netvsc_vf_join(struct net_device
> > *vf_netdev,
> >  			   ndev->name, ret);
> >  		goto upper_link_failed;
> >  	}
> > -
> > -	schedule_delayed_work(&ndev_ctx->vf_takeover,
> > VF_TAKEOVER_INT);
> > +	/* If this registration is called from probe context vf_takeover
> > +	 * is taken care of later in probe itself.
> I suspect "later in probe itself" is not accurate.
> If 'context' is VF_REG_IN_PROBE, I suppose what happens here is:
> after netvsc_probe() finishes, the netvsc interface becomes available,
> so the user space will ifup it, and netvsc_open() will UP the VF interface,
> and netvsc_netdev_event() is called for the VF with event ==
> NETDEV_POST_INIT (?) and NETDEV_CHANGE, and the data path is
> switched to the VF.
> 
> If my understanding is correct, I think in the case of 'context' ==
> VF_REG_IN_PROBE, I suspect the "Align MTU of VF with master"
> and the "sync address list from ndev to VF" in __netvsc_vf_setup() are
> omitted? If so, should this be fixed? e.g. Not sure if the below is an issue or not:
> 1) a VF is bound to a NetVSC interface, and a user sets the MTUs to 1024.
> 2) rmmod hv_netvsc
> 3) modprobe hv_netvsc
> 4) the netvsc interface uses MTU=1500 (the default), and the VF still uses 1024.
> 
> > @@ -2597,6 +2604,34 @@ static int netvsc_probe(struct hv_device *dev,
> >  	}
> > 
> >  	list_add(&net_device_ctx->list, &netvsc_dev_list);
> > +
> > +	/* When the hv_netvsc driver is removed and readded, the
> 
> s/removed and readded/unloaded and reloaded/
> 
> > +	 * NET_DEVICE_REGISTER for the vf device is replayed before
> > probe
> > +	 * is complete. This is because register_netdevice_notifier() gets
> > +	 * registered before vmbus_driver_register() so that callback func
> > +	 * is set before probe and we don't miss events like
> > NETDEV_POST_INIT
> > +	 * So, in this section we try to register each matching
> 
> Looks like there are spaces at the end of the line. We can move up a few words
> from the next line :-)
> 
> s/each matching/the matching/
> A NetVSC interface has only 1 matching VF device.
> 
> > +	 * vf device that is present as a netdevice, knowing that it's register
> 
> s/it's/its/
> 
> > +	 * call is not processed in the netvsc_netdev_notifier(as probing is
> > +	 * progress and get_netvsc_byslot fails).
> > +	 */
> > +	for_each_netdev(dev_net(net), vf_netdev) {
> > +		if (vf_netdev->netdev_ops == &device_ops)
> > +			continue;
> > +
> > +		if (vf_netdev->type != ARPHRD_ETHER)
> > +			continue;
> > +
> > +		if (is_vlan_dev(vf_netdev))
> > +			continue;
> > +
> > +		if (netif_is_bond_master(vf_netdev))
> > +			continue;
> 
> The code above is duplicated from netvsc_netdev_event().
> Can we add a new helper function is_matching_vf() to avoid the duplication?
Sure, I will do that. Thanks
> 
> > +		netvsc_prepare_bonding(vf_netdev);
> > +		netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
> > +		__netvsc_vf_setup(net, vf_netdev);
> 
> add a "break;' ?
With MANA devices and multiport support there, the individual ports are also net_devices.
Wouldn't this be needed for such scenario(where we have multiple mana port net devices) to
register them all?
> 
> > +	}
> >  	rtnl_unlock();
  
Haiyang Zhang Jan. 31, 2024, 4:46 p.m. UTC | #7
> -----Original Message-----
> From: Shradha Gupta <shradhagupta@linux.microsoft.com>
> Sent: Wednesday, January 31, 2024 2:54 AM
> To: Dexuan Cui <decui@microsoft.com>
> Cc: KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
> <haiyangz@microsoft.com>; Wei Liu <wei.liu@kernel.org>; David S. Miller
> <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>; Jakub Kicinski
> <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>; Wojciech Drewek
> <wojciech.drewek@intel.com>; linux-hyperv@vger.kernel.org;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Shradha Gupta
> <shradhagupta@microsoft.com>; stable@vger.kernel.org
> Subject: Re: [PATCH] hv_netvsc:Register VF in netvsc_probe if
> NET_DEVICE_REGISTER missed
> 
> On Tue, Jan 30, 2024 at 08:13:21PM +0000, Dexuan Cui wrote:
> > > From: Shradha Gupta <shradhagupta@linux.microsoft.com>
> > > Sent: Monday, January 29, 2024 11:19 PM
> > >  [...]
> > > If hv_netvsc driver is removed and reloaded, the NET_DEVICE_REGISTER
> >
> > s/removed/unloaded/
> > unloaded looks more accurate to me :-)
> >
> > > [...]
> > > Tested-on: Ubuntu22
> > > Testcases: LISA testsuites
> > > 	   verify_reload_hyperv_modules, perf_tcp_ntttcp_sriov
> > IMO the 3 lines can be removed: this bug is not specific to Ubuntu, and
> the
> > test case names don't provide extra value to help understand the issue
> > here and they might cause more questions unnecessarily, e.g. what's
> LISA,
> > what exactly do the test cases do.
> >
> > > +/* Macros to define the context of vf registration */
> > > +#define VF_REG_IN_PROBE		1
> > > +#define VF_REG_IN_RECV_CBACK	2
> >
> > I think VF_REG_IN_NOTIFIER is a better name?
> > RECV_CBALL looks inaccurate to me.
> >
> > > @@ -2205,8 +2209,11 @@ static int netvsc_vf_join(struct net_device
> > > *vf_netdev,
> > >  			   ndev->name, ret);
> > >  		goto upper_link_failed;
> > >  	}
> > > -
> > > -	schedule_delayed_work(&ndev_ctx->vf_takeover,
> > > VF_TAKEOVER_INT);
> > > +	/* If this registration is called from probe context vf_takeover
> > > +	 * is taken care of later in probe itself.
> > I suspect "later in probe itself" is not accurate.
> > If 'context' is VF_REG_IN_PROBE, I suppose what happens here is:
> > after netvsc_probe() finishes, the netvsc interface becomes available,
> > so the user space will ifup it, and netvsc_open() will UP the VF
> interface,
> > and netvsc_netdev_event() is called for the VF with event ==
> > NETDEV_POST_INIT (?) and NETDEV_CHANGE, and the data path is
> > switched to the VF.
> >
> > If my understanding is correct, I think in the case of 'context' ==
> > VF_REG_IN_PROBE, I suspect the "Align MTU of VF with master"
> > and the "sync address list from ndev to VF" in __netvsc_vf_setup() are
> > omitted? If so, should this be fixed? e.g. Not sure if the below is an
> issue or not:
> > 1) a VF is bound to a NetVSC interface, and a user sets the MTUs to
> 1024.
> > 2) rmmod hv_netvsc
> > 3) modprobe hv_netvsc
> > 4) the netvsc interface uses MTU=1500 (the default), and the VF still
> uses 1024.
> >
> > > @@ -2597,6 +2604,34 @@ static int netvsc_probe(struct hv_device *dev,
> > >  	}
> > >
> > >  	list_add(&net_device_ctx->list, &netvsc_dev_list);
> > > +
> > > +	/* When the hv_netvsc driver is removed and readded, the
> >
> > s/removed and readded/unloaded and reloaded/
> >
> > > +	 * NET_DEVICE_REGISTER for the vf device is replayed before
> > > probe
> > > +	 * is complete. This is because register_netdevice_notifier() gets
> > > +	 * registered before vmbus_driver_register() so that callback func
> > > +	 * is set before probe and we don't miss events like
> > > NETDEV_POST_INIT
> > > +	 * So, in this section we try to register each matching
> >
> > Looks like there are spaces at the end of the line. We can move up a
> few words
> > from the next line :-)
> >
> > s/each matching/the matching/
> > A NetVSC interface has only 1 matching VF device.
> >
> > > +	 * vf device that is present as a netdevice, knowing that it's
> register
> >
> > s/it's/its/
> >
> > > +	 * call is not processed in the netvsc_netdev_notifier(as probing
> is
> > > +	 * progress and get_netvsc_byslot fails).
> > > +	 */
> > > +	for_each_netdev(dev_net(net), vf_netdev) {
> > > +		if (vf_netdev->netdev_ops == &device_ops)
> > > +			continue;
> > > +
> > > +		if (vf_netdev->type != ARPHRD_ETHER)
> > > +			continue;
> > > +
> > > +		if (is_vlan_dev(vf_netdev))
> > > +			continue;
> > > +
> > > +		if (netif_is_bond_master(vf_netdev))
> > > +			continue;
> >
> > The code above is duplicated from netvsc_netdev_event().
> > Can we add a new helper function is_matching_vf() to avoid the
> duplication?
> Sure, I will do that. Thanks
> >
> > > +		netvsc_prepare_bonding(vf_netdev);
> > > +		netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
> > > +		__netvsc_vf_setup(net, vf_netdev);
> >
> > add a "break;' ?
> With MANA devices and multiport support there, the individual ports are
> also net_devices.
> Wouldn't this be needed for such scenario(where we have multiple mana
> port net devices) to
> register them all?

Each device has separate probe() call, so only one VF will match in one 
netvsc_probe().

netvsc_prepare_bonding() &  netvsc_register_vf() have 
get_netvsc_byslot(vf_netdev), but __netvsc_vf_setup() doesn't have. So,
in case of multi-Vfs, this code will run "this" netvsc NIC with multiple VFs by 
__netvsc_vf_setup() which isn't correct.

You need to add the following lines before netvsc_prepare_bonding(vf_netdev)
in netvsc_probe() to skip non-matching VFs:

if (net != get_netvsc_byslot(vf_netdev))
	continue;

Thanks,
- Haiyang
  
Dexuan Cui Jan. 31, 2024, 5:40 p.m. UTC | #8
> From: Haiyang Zhang <haiyangz@microsoft.com>
> Sent: Wednesday, January 31, 2024 8:46 AM
>  [...]
> > From: Shradha Gupta <shradhagupta@linux.microsoft.com>
> > Sent: Wednesday, January 31, 2024 2:54 AM
> > > [...]
> > > > +		netvsc_prepare_bonding(vf_netdev);
> > > > +		netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
> > > > +		__netvsc_vf_setup(net, vf_netdev);
> > >
> > > add a "break;' ?
> > With MANA devices and multiport support there, the individual ports are
> > also net_devices.
> > Wouldn't this be needed for such scenario(where we have multiple mana
> > port net devices) to
> > register them all?
> 
> Each device has separate probe() call, so only one VF will match in one
> netvsc_probe().
> 
> netvsc_prepare_bonding() &  netvsc_register_vf() have
> get_netvsc_byslot(vf_netdev), but __netvsc_vf_setup() doesn't have. So,
> in case of multi-Vfs, this code will run "this" netvsc NIC with multiple VFs by
> __netvsc_vf_setup() which isn't correct.
> 
> You need to add the following lines before
> netvsc_prepare_bonding(vf_netdev)
> in netvsc_probe() to skip non-matching VFs:
> 
> if (net != get_netvsc_byslot(vf_netdev))
> 	continue;

Haiyang is correct.
I think it's still good to add a "break;", e.g. my understanding is something
like the below (this is untested):

+static struct net_device *get_matching_netvsc_dev(net_device *event_ndev)
+{
+       /* Skip NetVSC interfaces */
+       if (event_ndev->netdev_ops == &device_ops)
+               return NULL;
+
+       /* Avoid non-Ethernet type devices */
+       if (event_ndev->type != ARPHRD_ETHER)
+               return NULL;
+
+       /* Avoid Vlan dev with same MAC registering as VF */
+       if (is_vlan_dev(event_ndev))
+               return NULL;
+
+       /* Avoid Bonding master dev with same MAC registering as VF */
+       if (netif_is_bond_master(event_ndev))
+               return NULL;
+
+       return get_netvsc_byslot(event_ndev);
+}

+	for_each_netdev(dev_net(net), vf_netdev) {
+ 		if (get_matching_netvsc_dev(event_dev) != net)
+			continue;
+
+		netvsc_prepare_bonding(vf_netdev);
+		netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
+		__netvsc_vf_setup(net, vf_netdev);
+
+		break;
+	}

We can also use get_matching_netvsc_dev() in netvsc_netdev_event().

BTW, please add a space between "hv_netvsc:" and "Register" in the Subject.
  
Haiyang Zhang Jan. 31, 2024, 8:05 p.m. UTC | #9
> -----Original Message-----
> From: Dexuan Cui <decui@microsoft.com>
> Sent: Wednesday, January 31, 2024 12:40 PM
> To: Haiyang Zhang <haiyangz@microsoft.com>; Shradha Gupta
> <shradhagupta@linux.microsoft.com>
> Cc: KY Srinivasan <kys@microsoft.com>; Wei Liu <wei.liu@kernel.org>;
> David S. Miller <davem@davemloft.net>; Eric Dumazet
> <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>; Paolo Abeni
> <pabeni@redhat.com>; Wojciech Drewek <wojciech.drewek@intel.com>; linux-
> hyperv@vger.kernel.org; netdev@vger.kernel.org; linux-
> kernel@vger.kernel.org; Shradha Gupta <shradhagupta@microsoft.com>;
> stable@vger.kernel.org
> Subject: RE: [PATCH] hv_netvsc:Register VF in netvsc_probe if
> NET_DEVICE_REGISTER missed
> 
> > From: Haiyang Zhang <haiyangz@microsoft.com>
> > Sent: Wednesday, January 31, 2024 8:46 AM
> >  [...]
> > > From: Shradha Gupta <shradhagupta@linux.microsoft.com>
> > > Sent: Wednesday, January 31, 2024 2:54 AM
> > > > [...]
> > > > > +		netvsc_prepare_bonding(vf_netdev);
> > > > > +		netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
> > > > > +		__netvsc_vf_setup(net, vf_netdev);
> > > >
> > > > add a "break;' ?
> > > With MANA devices and multiport support there, the individual ports
> are
> > > also net_devices.
> > > Wouldn't this be needed for such scenario(where we have multiple mana
> > > port net devices) to
> > > register them all?
> >
> > Each device has separate probe() call, so only one VF will match in one
> > netvsc_probe().
> >
> > netvsc_prepare_bonding() &  netvsc_register_vf() have
> > get_netvsc_byslot(vf_netdev), but __netvsc_vf_setup() doesn't have. So,
> > in case of multi-Vfs, this code will run "this" netvsc NIC with
> multiple VFs by
> > __netvsc_vf_setup() which isn't correct.
> >
> > You need to add the following lines before
> > netvsc_prepare_bonding(vf_netdev)
> > in netvsc_probe() to skip non-matching VFs:
> >
> > if (net != get_netvsc_byslot(vf_netdev))
> > 	continue;
> 
> Haiyang is correct.
> I think it's still good to add a "break;", e.g. my understanding is
> something
> like the below (this is untested):
> 
> +static struct net_device *get_matching_netvsc_dev(net_device
> *event_ndev)
> +{
> +       /* Skip NetVSC interfaces */
> +       if (event_ndev->netdev_ops == &device_ops)
> +               return NULL;
> +
> +       /* Avoid non-Ethernet type devices */
> +       if (event_ndev->type != ARPHRD_ETHER)
> +               return NULL;
> +
> +       /* Avoid Vlan dev with same MAC registering as VF */
> +       if (is_vlan_dev(event_ndev))
> +               return NULL;
> +
> +       /* Avoid Bonding master dev with same MAC registering as VF */
> +       if (netif_is_bond_master(event_ndev))
> +               return NULL;
> +
> +       return get_netvsc_byslot(event_ndev);
> +}

Looks good. 
But, like you said before, the four if's can be moved into a new function,
and shared by two callers: netvsc_probe() & netvsc_netdev_event().


> 
> +	for_each_netdev(dev_net(net), vf_netdev) {
> + 		if (get_matching_netvsc_dev(event_dev) != net)
> +			continue;
> +
> +		netvsc_prepare_bonding(vf_netdev);
> +		netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
> +		__netvsc_vf_setup(net, vf_netdev);
> +
> +		break;
> +	}
> 
> We can also use get_matching_netvsc_dev() in netvsc_netdev_event().

netvsc_netdev_event() >> netvsc_unregister_vf() uses get_netvsc_byref(vf_netdev)
instead of get_netvsc_byslot().
So probably just re-factoring the four if's is better.

Thanks,
-Haiyang
  
Jakub Kicinski Feb. 1, 2024, 12:30 a.m. UTC | #10
On Tue, 30 Jan 2024 22:09:57 -0800 Shradha Gupta wrote:
> This patch applies to net, which is missed in the subject. I will fix
> this in the new version of the patch. Thanks


$ git checkout net/main 
[...]
$ git am raw
Applying: hv_netvsc:Register VF in netvsc_probe if NET_DEVICE_REGISTER missed
error: patch failed: drivers/net/hyperv/netvsc_drv.c:42
error: drivers/net/hyperv/netvsc_drv.c: patch does not apply
Patch failed at 0001 hv_netvsc:Register VF in netvsc_probe if NET_DEVICE_REGISTER missed
hint: Use 'git am --show-current-patch=diff' to see the failed patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".
  

Patch

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 706ea5263e87..25c4dc9cc4bd 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -42,6 +42,10 @@ 
 #define LINKCHANGE_INT (2 * HZ)
 #define VF_TAKEOVER_INT (HZ / 10)
 
+/* Macros to define the context of vf registration */
+#define VF_REG_IN_PROBE		1
+#define VF_REG_IN_RECV_CBACK	2
+
 static unsigned int ring_size __ro_after_init = 128;
 module_param(ring_size, uint, 0444);
 MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
@@ -2183,7 +2187,7 @@  static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
 }
 
 static int netvsc_vf_join(struct net_device *vf_netdev,
-			  struct net_device *ndev)
+			  struct net_device *ndev, int context)
 {
 	struct net_device_context *ndev_ctx = netdev_priv(ndev);
 	int ret;
@@ -2205,8 +2209,11 @@  static int netvsc_vf_join(struct net_device *vf_netdev,
 			   ndev->name, ret);
 		goto upper_link_failed;
 	}
-
-	schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
+	/* If this registration is called from probe context vf_takeover
+	 * is taken care of later in probe itself.
+	 */
+	if (context == VF_REG_IN_RECV_CBACK)
+		schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
 
 	call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
 
@@ -2344,7 +2351,7 @@  static int netvsc_prepare_bonding(struct net_device *vf_netdev)
 	return NOTIFY_DONE;
 }
 
-static int netvsc_register_vf(struct net_device *vf_netdev)
+static int netvsc_register_vf(struct net_device *vf_netdev, int context)
 {
 	struct net_device_context *net_device_ctx;
 	struct netvsc_device *netvsc_dev;
@@ -2384,7 +2391,7 @@  static int netvsc_register_vf(struct net_device *vf_netdev)
 
 	netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
 
-	if (netvsc_vf_join(vf_netdev, ndev) != 0)
+	if (netvsc_vf_join(vf_netdev, ndev, context) != 0)
 		return NOTIFY_DONE;
 
 	dev_hold(vf_netdev);
@@ -2485,7 +2492,7 @@  static int netvsc_unregister_vf(struct net_device *vf_netdev)
 static int netvsc_probe(struct hv_device *dev,
 			const struct hv_vmbus_device_id *dev_id)
 {
-	struct net_device *net = NULL;
+	struct net_device *net = NULL, *vf_netdev;
 	struct net_device_context *net_device_ctx;
 	struct netvsc_device_info *device_info = NULL;
 	struct netvsc_device *nvdev;
@@ -2597,6 +2604,34 @@  static int netvsc_probe(struct hv_device *dev,
 	}
 
 	list_add(&net_device_ctx->list, &netvsc_dev_list);
+
+	/* When the hv_netvsc driver is removed and readded, the
+	 * NET_DEVICE_REGISTER for the vf device is replayed before probe
+	 * is complete. This is because register_netdevice_notifier() gets
+	 * registered before vmbus_driver_register() so that callback func
+	 * is set before probe and we don't miss events like NETDEV_POST_INIT
+	 * So, in this section we try to register each matching
+	 * vf device that is present as a netdevice, knowing that it's register
+	 * call is not processed in the netvsc_netdev_notifier(as probing is
+	 * progress and get_netvsc_byslot fails).
+	 */
+	for_each_netdev(dev_net(net), vf_netdev) {
+		if (vf_netdev->netdev_ops == &device_ops)
+			continue;
+
+		if (vf_netdev->type != ARPHRD_ETHER)
+			continue;
+
+		if (is_vlan_dev(vf_netdev))
+			continue;
+
+		if (netif_is_bond_master(vf_netdev))
+			continue;
+
+		netvsc_prepare_bonding(vf_netdev);
+		netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
+		__netvsc_vf_setup(net, vf_netdev);
+	}
 	rtnl_unlock();
 
 	netvsc_devinfo_put(device_info);
@@ -2773,7 +2808,7 @@  static int netvsc_netdev_event(struct notifier_block *this,
 	case NETDEV_POST_INIT:
 		return netvsc_prepare_bonding(event_dev);
 	case NETDEV_REGISTER:
-		return netvsc_register_vf(event_dev);
+		return netvsc_register_vf(event_dev, VF_REG_IN_RECV_CBACK);
 	case NETDEV_UNREGISTER:
 		return netvsc_unregister_vf(event_dev);
 	case NETDEV_UP: