[net-next,v7,1/8] bnxt_en: Add auxiliary driver support

Message ID 20230112202939.19562-2-ajit.khaparde@broadcom.com
State New
Headers
Series Add Auxiliary driver support |

Commit Message

Ajit Khaparde Jan. 12, 2023, 8:29 p.m. UTC
  Add auxiliary driver support.
An auxiliary device will be created if the hardware indicates
support for RDMA.
The bnxt_ulp_probe() function has been removed and a new
bnxt_rdma_aux_device_add() function has been added.
The bnxt_free_msix_vecs() and bnxt_req_msix_vecs() will now hold
the RTNL lock when they call the bnxt_close_nic()and bnxt_open_nic()
since the device close and open need to be protected under RTNL lock.
The operations between the bnxt_en and bnxt_re will be protected
using the en_ops_lock.
This will be used by the bnxt_re driver in a follow-on patch
to create ROCE interfaces.

Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Reviewed-by: Selvin Xavier <selvin.xavier@broadcom.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/net/ethernet/broadcom/Kconfig         |   1 +
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |   8 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |   8 +
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 164 +++++++++++++++---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h |   4 +-
 5 files changed, 158 insertions(+), 27 deletions(-)
  

Comments

Jakub Kicinski Jan. 14, 2023, 6:10 a.m. UTC | #1
On Thu, 12 Jan 2023 12:29:32 -0800 Ajit Khaparde wrote:
> Add auxiliary driver support.
> An auxiliary device will be created if the hardware indicates
> support for RDMA.
> The bnxt_ulp_probe() function has been removed and a new
> bnxt_rdma_aux_device_add() function has been added.
> The bnxt_free_msix_vecs() and bnxt_req_msix_vecs() will now hold
> the RTNL lock when they call the bnxt_close_nic()and bnxt_open_nic()
> since the device close and open need to be protected under RTNL lock.
> The operations between the bnxt_en and bnxt_re will be protected
> using the en_ops_lock.
> This will be used by the bnxt_re driver in a follow-on patch
> to create ROCE interfaces.

> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> @@ -13178,6 +13178,9 @@ static void bnxt_remove_one(struct pci_dev *pdev)
>  	struct net_device *dev = pci_get_drvdata(pdev);
>  	struct bnxt *bp = netdev_priv(dev);
>  
> +	bnxt_rdma_aux_device_uninit(bp);
> +	bnxt_aux_dev_free(bp);

You still free bp->aux_dev synchronously..

> +void bnxt_aux_dev_free(struct bnxt *bp)
> +{
> +	kfree(bp->aux_dev);

.. here. Which is called on .remove of the PCI device.

> +	bp->aux_dev = NULL;
> +}
> +
> +static struct bnxt_aux_dev *bnxt_aux_dev_alloc(struct bnxt *bp)
> +{
> +	return kzalloc(sizeof(struct bnxt_aux_dev), GFP_KERNEL);
> +}
> +
> +void bnxt_rdma_aux_device_uninit(struct bnxt *bp)
> +{
> +	struct bnxt_aux_dev *bnxt_adev;
> +	struct auxiliary_device *adev;
> +
> +	/* Skip if no auxiliary device init was done. */
> +	if (!(bp->flags & BNXT_FLAG_ROCE_CAP))
> +		return;
> +
> +	bnxt_adev = bp->aux_dev;
> +	adev = &bnxt_adev->aux_dev;
> +	auxiliary_device_delete(adev);
> +	auxiliary_device_uninit(adev);
> +	if (bnxt_adev->id >= 0)
> +		ida_free(&bnxt_aux_dev_ids, bnxt_adev->id);
> +}
> +
> +static void bnxt_aux_dev_release(struct device *dev)
> +{
> +	struct bnxt_aux_dev *bnxt_adev =
> +		container_of(dev, struct bnxt_aux_dev, aux_dev.dev);
> +	struct bnxt *bp = netdev_priv(bnxt_adev->edev->net);
> +
> +	bnxt_adev->edev->en_ops = NULL;
> +	kfree(bnxt_adev->edev);

And yet the reference counted "release" function accesses the bp->adev
like it must exist.

This seems odd to me - why do we need refcounting on devices at all 
if we can free them synchronously? To be clear - I'm not sure this is
wrong, just seems odd.

> +	bnxt_adev->edev = NULL;
> +	bp->edev = NULL;
> +}
  
Ajit Khaparde Jan. 14, 2023, 8:39 p.m. UTC | #2
On Fri, Jan 13, 2023 at 10:10 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Thu, 12 Jan 2023 12:29:32 -0800 Ajit Khaparde wrote:
> > Add auxiliary driver support.
> > An auxiliary device will be created if the hardware indicates
> > support for RDMA.
> > The bnxt_ulp_probe() function has been removed and a new
> > bnxt_rdma_aux_device_add() function has been added.
> > The bnxt_free_msix_vecs() and bnxt_req_msix_vecs() will now hold
> > the RTNL lock when they call the bnxt_close_nic()and bnxt_open_nic()
> > since the device close and open need to be protected under RTNL lock.
> > The operations between the bnxt_en and bnxt_re will be protected
> > using the en_ops_lock.
> > This will be used by the bnxt_re driver in a follow-on patch
> > to create ROCE interfaces.
>
> > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> > @@ -13178,6 +13178,9 @@ static void bnxt_remove_one(struct pci_dev *pdev)
> >       struct net_device *dev = pci_get_drvdata(pdev);
> >       struct bnxt *bp = netdev_priv(dev);
> >
> > +     bnxt_rdma_aux_device_uninit(bp);
> > +     bnxt_aux_dev_free(bp);
>
> You still free bp->aux_dev synchronously..
>
> > +void bnxt_aux_dev_free(struct bnxt *bp)
> > +{
> > +     kfree(bp->aux_dev);
>
> .. here. Which is called on .remove of the PCI device.
>
> > +     bp->aux_dev = NULL;
> > +}
> > +
> > +static struct bnxt_aux_dev *bnxt_aux_dev_alloc(struct bnxt *bp)
> > +{
> > +     return kzalloc(sizeof(struct bnxt_aux_dev), GFP_KERNEL);
> > +}
> > +
> > +void bnxt_rdma_aux_device_uninit(struct bnxt *bp)
> > +{
> > +     struct bnxt_aux_dev *bnxt_adev;
> > +     struct auxiliary_device *adev;
> > +
> > +     /* Skip if no auxiliary device init was done. */
> > +     if (!(bp->flags & BNXT_FLAG_ROCE_CAP))
> > +             return;
> > +
> > +     bnxt_adev = bp->aux_dev;
> > +     adev = &bnxt_adev->aux_dev;
> > +     auxiliary_device_delete(adev);
> > +     auxiliary_device_uninit(adev);
> > +     if (bnxt_adev->id >= 0)
> > +             ida_free(&bnxt_aux_dev_ids, bnxt_adev->id);
> > +}
> > +
> > +static void bnxt_aux_dev_release(struct device *dev)
> > +{
> > +     struct bnxt_aux_dev *bnxt_adev =
> > +             container_of(dev, struct bnxt_aux_dev, aux_dev.dev);
> > +     struct bnxt *bp = netdev_priv(bnxt_adev->edev->net);
> > +
> > +     bnxt_adev->edev->en_ops = NULL;
> > +     kfree(bnxt_adev->edev);
>
> And yet the reference counted "release" function accesses the bp->adev
> like it must exist.
>
> This seems odd to me - why do we need refcounting on devices at all
> if we can free them synchronously? To be clear - I'm not sure this is
> wrong, just seems odd.
I followed the existing implementations in that regard. Thanks

>
> > +     bnxt_adev->edev = NULL;
> > +     bp->edev = NULL;
> > +}
  
Jakub Kicinski Jan. 17, 2023, 4:56 a.m. UTC | #3
On Sat, 14 Jan 2023 12:39:09 -0800 Ajit Khaparde wrote:
> > > +static void bnxt_aux_dev_release(struct device *dev)
> > > +{
> > > +     struct bnxt_aux_dev *bnxt_adev =
> > > +             container_of(dev, struct bnxt_aux_dev, aux_dev.dev);
> > > +     struct bnxt *bp = netdev_priv(bnxt_adev->edev->net);
> > > +
> > > +     bnxt_adev->edev->en_ops = NULL;
> > > +     kfree(bnxt_adev->edev);  
> >
> > And yet the reference counted "release" function accesses the bp->adev
> > like it must exist.
> >
> > This seems odd to me - why do we need refcounting on devices at all
> > if we can free them synchronously? To be clear - I'm not sure this is
> > wrong, just seems odd.  
> I followed the existing implementations in that regard. Thanks

Leon, could you take a look? Is there no problem in assuming bnxt_adev
is still around in the release function?
  
Leon Romanovsky Jan. 17, 2023, 12:31 p.m. UTC | #4
On Mon, Jan 16, 2023 at 08:56:25PM -0800, Jakub Kicinski wrote:
> On Sat, 14 Jan 2023 12:39:09 -0800 Ajit Khaparde wrote:
> > > > +static void bnxt_aux_dev_release(struct device *dev)
> > > > +{
> > > > +     struct bnxt_aux_dev *bnxt_adev =
> > > > +             container_of(dev, struct bnxt_aux_dev, aux_dev.dev);
> > > > +     struct bnxt *bp = netdev_priv(bnxt_adev->edev->net);
> > > > +
> > > > +     bnxt_adev->edev->en_ops = NULL;
> > > > +     kfree(bnxt_adev->edev);  
> > >
> > > And yet the reference counted "release" function accesses the bp->adev
> > > like it must exist.
> > >
> > > This seems odd to me - why do we need refcounting on devices at all
> > > if we can free them synchronously? To be clear - I'm not sure this is
> > > wrong, just seems odd.  
> > I followed the existing implementations in that regard. Thanks
> 
> Leon, could you take a look? Is there no problem in assuming bnxt_adev
> is still around in the release function?

You caught a real bug. The auxdev idea is very simple - it needs to
behave like driver core, but in the driver itself.

As such, bnxt_aux_dev_free() shouldn't be called after bnxt_rdma_aux_device_uninit().
Device will be released through auxiliary_device_uninit();

BTW, line 325 from below shouldn't exist too.

  312 void bnxt_rdma_aux_device_uninit(struct bnxt *bp)
  313 {
...
  325         if (bnxt_adev->id >= 0)
  326                 ida_free(&bnxt_aux_dev_ids, bnxt_adev->id);

And one line bnxt_aux_dev_alloc() needs to be deleted too.

Thanks
  
Leon Romanovsky Jan. 17, 2023, 5:18 p.m. UTC | #5
On Tue, Jan 17, 2023 at 02:31:01PM +0200, Leon Romanovsky wrote:
> On Mon, Jan 16, 2023 at 08:56:25PM -0800, Jakub Kicinski wrote:
> > On Sat, 14 Jan 2023 12:39:09 -0800 Ajit Khaparde wrote:
> > > > > +static void bnxt_aux_dev_release(struct device *dev)
> > > > > +{
> > > > > +     struct bnxt_aux_dev *bnxt_adev =
> > > > > +             container_of(dev, struct bnxt_aux_dev, aux_dev.dev);
> > > > > +     struct bnxt *bp = netdev_priv(bnxt_adev->edev->net);
> > > > > +
> > > > > +     bnxt_adev->edev->en_ops = NULL;
> > > > > +     kfree(bnxt_adev->edev);  
> > > >
> > > > And yet the reference counted "release" function accesses the bp->adev
> > > > like it must exist.
> > > >
> > > > This seems odd to me - why do we need refcounting on devices at all
> > > > if we can free them synchronously? To be clear - I'm not sure this is
> > > > wrong, just seems odd.  
> > > I followed the existing implementations in that regard. Thanks
> > 
> > Leon, could you take a look? Is there no problem in assuming bnxt_adev
> > is still around in the release function?
> 
> You caught a real bug. The auxdev idea is very simple - it needs to
> behave like driver core, but in the driver itself.

BTW, this can be classic example why assigning NULL pointers after
release is bad practice. It hides this class of errors.

+void bnxt_aux_dev_free(struct bnxt *bp)
+{
+       kfree(bp->aux_dev);
+       bp->aux_dev = NULL;
+}

Thanks
  
Ajit Khaparde Jan. 17, 2023, 7:33 p.m. UTC | #6
On Tue, Jan 17, 2023 at 4:31 AM Leon Romanovsky <leonro@nvidia.com> wrote:
>
> On Mon, Jan 16, 2023 at 08:56:25PM -0800, Jakub Kicinski wrote:
> > On Sat, 14 Jan 2023 12:39:09 -0800 Ajit Khaparde wrote:
> > > > > +static void bnxt_aux_dev_release(struct device *dev)
> > > > > +{
> > > > > +     struct bnxt_aux_dev *bnxt_adev =
> > > > > +             container_of(dev, struct bnxt_aux_dev, aux_dev.dev);
> > > > > +     struct bnxt *bp = netdev_priv(bnxt_adev->edev->net);
> > > > > +
> > > > > +     bnxt_adev->edev->en_ops = NULL;
> > > > > +     kfree(bnxt_adev->edev);
> > > >
> > > > And yet the reference counted "release" function accesses the bp->adev
> > > > like it must exist.
> > > >
> > > > This seems odd to me - why do we need refcounting on devices at all
> > > > if we can free them synchronously? To be clear - I'm not sure this is
> > > > wrong, just seems odd.
> > > I followed the existing implementations in that regard. Thanks
> >
> > Leon, could you take a look? Is there no problem in assuming bnxt_adev
> > is still around in the release function?
>
> You caught a real bug. The auxdev idea is very simple - it needs to
> behave like driver core, but in the driver itself.
>
> As such, bnxt_aux_dev_free() shouldn't be called after bnxt_rdma_aux_device_uninit().
> Device will be released through auxiliary_device_uninit();
>
> BTW, line 325 from below shouldn't exist too.
>
>   312 void bnxt_rdma_aux_device_uninit(struct bnxt *bp)
>   313 {
> ...
>   325         if (bnxt_adev->id >= 0)
>   326                 ida_free(&bnxt_aux_dev_ids, bnxt_adev->id);
>
> And one line bnxt_aux_dev_alloc() needs to be deleted too.
>
> Thanks
Thanks.
We are reviewing the comments and will have an update soon.
  
Ajit Khaparde Jan. 18, 2023, 6:32 a.m. UTC | #7
On Tue, Jan 17, 2023 at 4:31 AM Leon Romanovsky <leonro@nvidia.com> wrote:
>
> On Mon, Jan 16, 2023 at 08:56:25PM -0800, Jakub Kicinski wrote:
> > On Sat, 14 Jan 2023 12:39:09 -0800 Ajit Khaparde wrote:
> > > > > +static void bnxt_aux_dev_release(struct device *dev)
> > > > > +{
> > > > > +     struct bnxt_aux_dev *bnxt_adev =
> > > > > +             container_of(dev, struct bnxt_aux_dev, aux_dev.dev);
> > > > > +     struct bnxt *bp = netdev_priv(bnxt_adev->edev->net);
> > > > > +
> > > > > +     bnxt_adev->edev->en_ops = NULL;
> > > > > +     kfree(bnxt_adev->edev);
> > > >
> > > > And yet the reference counted "release" function accesses the bp->adev
> > > > like it must exist.
> > > >
> > > > This seems odd to me - why do we need refcounting on devices at all
> > > > if we can free them synchronously? To be clear - I'm not sure this is
> > > > wrong, just seems odd.
> > > I followed the existing implementations in that regard. Thanks
> >
> > Leon, could you take a look? Is there no problem in assuming bnxt_adev
> > is still around in the release function?
>
> You caught a real bug. The auxdev idea is very simple - it needs to
> behave like driver core, but in the driver itself.
>
> As such, bnxt_aux_dev_free() shouldn't be called after bnxt_rdma_aux_device_uninit().
> Device will be released through auxiliary_device_uninit();
Ok. But..
bnxt_aux_dev_free() is actually freeing up the private memory allocated
for holding the pointer returned by my_aux_dev_alloc(xxx);
The aux device is freed via the auxiliary_device_uninit only.

>
> BTW, line 325 from below shouldn't exist too.
ACK

>
>   312 void bnxt_rdma_aux_device_uninit(struct bnxt *bp)
>   313 {
> ...
>   325         if (bnxt_adev->id >= 0)
>   326                 ida_free(&bnxt_aux_dev_ids, bnxt_adev->id);
>
> And one line bnxt_aux_dev_alloc() needs to be deleted too.
To avoid confusion, I will refactor and rename the code handling
auxiliary_device alloc, cleanup and the alloc, cleanup of priv
pointers used for bookkeeping.

I hope the new patchset will address the concerns raised.

>
> Thanks
  

Patch

diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index f4ca0c6c0f51..948586bf1b5b 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -213,6 +213,7 @@  config BNXT
 	select NET_DEVLINK
 	select PAGE_POOL
 	select DIMLIB
+	select AUXILIARY_BUS
 	help
 	  This driver supports Broadcom NetXtreme-C/E 10/25/40/50 gigabit
 	  Ethernet cards.  To compile this driver as a module, choose M here:
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 16ce7a90610c..c5cbf2307e07 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -13178,6 +13178,9 @@  static void bnxt_remove_one(struct pci_dev *pdev)
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct bnxt *bp = netdev_priv(dev);
 
+	bnxt_rdma_aux_device_uninit(bp);
+	bnxt_aux_dev_free(bp);
+
 	if (BNXT_PF(bp))
 		bnxt_sriov_disable(bp);
 
@@ -13776,11 +13779,13 @@  static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	bnxt_dl_fw_reporters_create(bp);
 
+	bnxt_rdma_aux_device_init(bp);
+
 	bnxt_print_device_info(bp);
 
 	pci_save_state(pdev);
-	return 0;
 
+	return 0;
 init_err_cleanup:
 	bnxt_dl_unregister(bp);
 init_err_dl:
@@ -13824,7 +13829,6 @@  static void bnxt_shutdown(struct pci_dev *pdev)
 	if (netif_running(dev))
 		dev_close(dev);
 
-	bnxt_ulp_shutdown(bp);
 	bnxt_clear_int_mode(bp);
 	pci_disable_device(pdev);
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 5163ef4a49ea..4df2da81708e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -24,6 +24,7 @@ 
 #include <linux/interrupt.h>
 #include <linux/rhashtable.h>
 #include <linux/crash_dump.h>
+#include <linux/auxiliary_bus.h>
 #include <net/devlink.h>
 #include <net/dst_metadata.h>
 #include <net/xdp.h>
@@ -1631,6 +1632,12 @@  struct bnxt_fw_health {
 #define BNXT_FW_IF_RETRY		10
 #define BNXT_FW_SLOT_RESET_RETRY	4
 
+struct bnxt_aux_dev {
+	struct auxiliary_device aux_dev;
+	struct bnxt_en_dev *edev;
+	int id;
+};
+
 enum board_idx {
 	BCM57301,
 	BCM57302,
@@ -1852,6 +1859,7 @@  struct bnxt {
 #define BNXT_CHIP_P4_PLUS(bp)			\
 	(BNXT_CHIP_P4(bp) || BNXT_CHIP_P5(bp))
 
+	struct bnxt_aux_dev	*aux_dev;
 	struct bnxt_en_dev	*edev;
 
 	struct bnxt_napi	**bnapi;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 2e54bf4fc7a7..899d9adfd35f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -25,6 +25,8 @@ 
 #include "bnxt_hwrm.h"
 #include "bnxt_ulp.h"
 
+static DEFINE_IDA(bnxt_aux_dev_ids);
+
 static int bnxt_register_dev(struct bnxt_en_dev *edev, unsigned int ulp_id,
 			     struct bnxt_ulp_ops *ulp_ops, void *handle)
 {
@@ -32,7 +34,6 @@  static int bnxt_register_dev(struct bnxt_en_dev *edev, unsigned int ulp_id,
 	struct bnxt *bp = netdev_priv(dev);
 	struct bnxt_ulp *ulp;
 
-	ASSERT_RTNL();
 	if (ulp_id >= BNXT_MAX_ULP)
 		return -EINVAL;
 
@@ -50,7 +51,7 @@  static int bnxt_register_dev(struct bnxt_en_dev *edev, unsigned int ulp_id,
 			return -ENOMEM;
 	}
 
-	atomic_set(&ulp->ref_count, 0);
+	atomic_set(&ulp->ref_count, 1);
 	ulp->handle = handle;
 	rcu_assign_pointer(ulp->ulp_ops, ulp_ops);
 
@@ -69,10 +70,11 @@  static int bnxt_unregister_dev(struct bnxt_en_dev *edev, unsigned int ulp_id)
 	struct bnxt_ulp *ulp;
 	int i = 0;
 
-	ASSERT_RTNL();
 	if (ulp_id >= BNXT_MAX_ULP)
 		return -EINVAL;
 
+	edev->flags |= BNXT_EN_FLAG_ULP_STOPPED;
+
 	ulp = &edev->ulp_tbl[ulp_id];
 	if (!rcu_access_pointer(ulp->ulp_ops)) {
 		netdev_err(bp->dev, "ulp id %d not registered\n", ulp_id);
@@ -126,7 +128,6 @@  static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, unsigned int ulp_id,
 	int total_vecs;
 	int rc = 0;
 
-	ASSERT_RTNL();
 	if (ulp_id != BNXT_ROCE_ULP)
 		return -EINVAL;
 
@@ -149,10 +150,12 @@  static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, unsigned int ulp_id,
 		max_idx = min_t(int, bp->total_irqs, max_cp_rings);
 		idx = max_idx - avail_msix;
 	}
+
 	edev->ulp_tbl[ulp_id].msix_base = idx;
 	edev->ulp_tbl[ulp_id].msix_requested = avail_msix;
 	hw_resc = &bp->hw_resc;
 	total_vecs = idx + avail_msix;
+	rtnl_lock();
 	if (bp->total_irqs < total_vecs ||
 	    (BNXT_NEW_RM(bp) && hw_resc->resv_irqs < total_vecs)) {
 		if (netif_running(dev)) {
@@ -162,6 +165,7 @@  static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, unsigned int ulp_id,
 			rc = bnxt_reserve_rings(bp, true);
 		}
 	}
+	rtnl_unlock();
 	if (rc) {
 		edev->ulp_tbl[ulp_id].msix_requested = 0;
 		return -EAGAIN;
@@ -184,7 +188,6 @@  static int bnxt_free_msix_vecs(struct bnxt_en_dev *edev, unsigned int ulp_id)
 	struct net_device *dev = edev->net;
 	struct bnxt *bp = netdev_priv(dev);
 
-	ASSERT_RTNL();
 	if (ulp_id != BNXT_ROCE_ULP)
 		return -EINVAL;
 
@@ -193,10 +196,13 @@  static int bnxt_free_msix_vecs(struct bnxt_en_dev *edev, unsigned int ulp_id)
 
 	edev->ulp_tbl[ulp_id].msix_requested = 0;
 	edev->flags &= ~BNXT_EN_FLAG_MSIX_REQUESTED;
+	rtnl_lock();
 	if (netif_running(dev) && !(edev->flags & BNXT_EN_FLAG_ULP_STOPPED)) {
 		bnxt_close_nic(bp, true, false);
 		bnxt_open_nic(bp, true, false);
 	}
+	rtnl_unlock();
+
 	return 0;
 }
 
@@ -347,25 +353,6 @@  void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs)
 	}
 }
 
-void bnxt_ulp_shutdown(struct bnxt *bp)
-{
-	struct bnxt_en_dev *edev = bp->edev;
-	struct bnxt_ulp_ops *ops;
-	int i;
-
-	if (!edev)
-		return;
-
-	for (i = 0; i < BNXT_MAX_ULP; i++) {
-		struct bnxt_ulp *ulp = &edev->ulp_tbl[i];
-
-		ops = rtnl_dereference(ulp->ulp_ops);
-		if (!ops || !ops->ulp_shutdown)
-			continue;
-		ops->ulp_shutdown(ulp->handle);
-	}
-}
-
 void bnxt_ulp_irq_stop(struct bnxt *bp)
 {
 	struct bnxt_en_dev *edev = bp->edev;
@@ -475,6 +462,135 @@  static const struct bnxt_en_ops bnxt_en_ops_tbl = {
 	.bnxt_register_fw_async_events	= bnxt_register_async_events,
 };
 
+void bnxt_aux_dev_free(struct bnxt *bp)
+{
+	kfree(bp->aux_dev);
+	bp->aux_dev = NULL;
+}
+
+static struct bnxt_aux_dev *bnxt_aux_dev_alloc(struct bnxt *bp)
+{
+	return kzalloc(sizeof(struct bnxt_aux_dev), GFP_KERNEL);
+}
+
+void bnxt_rdma_aux_device_uninit(struct bnxt *bp)
+{
+	struct bnxt_aux_dev *bnxt_adev;
+	struct auxiliary_device *adev;
+
+	/* Skip if no auxiliary device init was done. */
+	if (!(bp->flags & BNXT_FLAG_ROCE_CAP))
+		return;
+
+	bnxt_adev = bp->aux_dev;
+	adev = &bnxt_adev->aux_dev;
+	auxiliary_device_delete(adev);
+	auxiliary_device_uninit(adev);
+	if (bnxt_adev->id >= 0)
+		ida_free(&bnxt_aux_dev_ids, bnxt_adev->id);
+}
+
+static void bnxt_aux_dev_release(struct device *dev)
+{
+	struct bnxt_aux_dev *bnxt_adev =
+		container_of(dev, struct bnxt_aux_dev, aux_dev.dev);
+	struct bnxt *bp = netdev_priv(bnxt_adev->edev->net);
+
+	bnxt_adev->edev->en_ops = NULL;
+	kfree(bnxt_adev->edev);
+	bnxt_adev->edev = NULL;
+	bp->edev = NULL;
+}
+
+static void bnxt_set_edev_info(struct bnxt_en_dev *edev, struct bnxt *bp)
+{
+	edev->en_ops = &bnxt_en_ops_tbl;
+	edev->net = bp->dev;
+	edev->pdev = bp->pdev;
+	edev->l2_db_size = bp->db_size;
+	edev->l2_db_size_nc = bp->db_size;
+
+	if (bp->flags & BNXT_FLAG_ROCEV1_CAP)
+		edev->flags |= BNXT_EN_FLAG_ROCEV1_CAP;
+	if (bp->flags & BNXT_FLAG_ROCEV2_CAP)
+		edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP;
+}
+
+static int bnxt_rdma_aux_device_add(struct bnxt *bp)
+{
+	struct bnxt_aux_dev *bnxt_adev = bp->aux_dev;
+	struct bnxt_en_dev *edev = bnxt_adev->edev;
+	struct auxiliary_device *aux_dev;
+	int ret;
+
+	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+	if (!edev)
+		return -ENOMEM;
+
+	aux_dev = &bnxt_adev->aux_dev;
+	aux_dev->id = bnxt_adev->id;
+	aux_dev->name = "rdma";
+	aux_dev->dev.parent = &bp->pdev->dev;
+	aux_dev->dev.release = bnxt_aux_dev_release;
+
+	bnxt_adev->edev = edev;
+	bp->edev = edev;
+	bnxt_set_edev_info(edev, bp);
+
+	ret = auxiliary_device_init(aux_dev);
+	if (ret)
+		goto free_edev;
+
+	ret = auxiliary_device_add(aux_dev);
+	if (ret)
+		goto aux_dev_uninit;
+
+	return 0;
+aux_dev_uninit:
+	auxiliary_device_uninit(aux_dev);
+free_edev:
+	kfree(edev);
+	bp->edev = NULL;
+
+	return ret;
+}
+
+void bnxt_rdma_aux_device_init(struct bnxt *bp)
+{
+	int rc;
+
+	if (!(bp->flags & BNXT_FLAG_ROCE_CAP))
+		return;
+
+	bp->aux_dev = bnxt_aux_dev_alloc(bp);
+	if (!bp->aux_dev)
+		goto skip_ida_init;
+
+	bp->aux_dev->id = ida_alloc(&bnxt_aux_dev_ids, GFP_KERNEL);
+	if (bp->aux_dev->id < 0) {
+		netdev_warn(bp->dev,
+			    "ida alloc failed for ROCE auxiliary device\n");
+		goto skip_aux_init;
+	}
+
+	/* If aux bus init fails, continue with netdev init. */
+	rc = bnxt_rdma_aux_device_add(bp);
+	if (rc) {
+		netdev_warn(bp->dev,
+			    "Failed to add auxiliary device for ROCE\n");
+		goto aux_add_failed;
+	}
+	return;
+
+aux_add_failed:
+	ida_free(&bnxt_aux_dev_ids, bp->aux_dev->id);
+	bp->aux_dev->id = -1;
+skip_aux_init:
+	bnxt_aux_dev_free(bp);
+skip_ida_init:
+	bp->flags &= ~BNXT_FLAG_ROCE_CAP;
+}
+
 struct bnxt_en_dev *bnxt_ulp_probe(struct net_device *dev)
 {
 	struct bnxt *bp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
index 42b50abc3e91..47c7131e5549 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
@@ -102,10 +102,12 @@  int bnxt_get_ulp_stat_ctxs(struct bnxt *bp);
 void bnxt_ulp_stop(struct bnxt *bp);
 void bnxt_ulp_start(struct bnxt *bp, int err);
 void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs);
-void bnxt_ulp_shutdown(struct bnxt *bp);
 void bnxt_ulp_irq_stop(struct bnxt *bp);
 void bnxt_ulp_irq_restart(struct bnxt *bp, int err);
 void bnxt_ulp_async_events(struct bnxt *bp, struct hwrm_async_event_cmpl *cmpl);
+void bnxt_rdma_aux_device_uninit(struct bnxt *bp);
+void bnxt_rdma_aux_device_init(struct bnxt *bp);
+void bnxt_aux_dev_free(struct bnxt *bp);
 struct bnxt_en_dev *bnxt_ulp_probe(struct net_device *dev);
 
 #endif