On Sun, Aug 13, 2023 at 04:51:20PM +0200, Petr Pavlu wrote:
> Use a notifier to implement mlx4_dispatch_event() in preparation to
> switch mlx4_en and mlx4_ib to be an auxiliary device.
>
> A problem is that if the mlx4_interface.event callback was replaced with
> something as mlx4_adrv.event then the implementation of
> mlx4_dispatch_event() would need to acquire a lock on a given device
> before executing this callback. That is necessary because otherwise
> there is no guarantee that the associated driver cannot get unbound when
> the callback is running. However, taking this lock is not possible
> because mlx4_dispatch_event() can be invoked from the hardirq context.
> Using an atomic notifier allows the driver to accurately record when it
> wants to receive these events and solves this problem.
>
> A handler registration is done by both mlx4_en and mlx4_ib at the end of
> their mlx4_interface.add callback. This matches the current situation
> when mlx4_add_device() would enable events for a given device
> immediately after this callback, by adding the device on the
> mlx4_priv.list.
>
> Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
> Tested-by: Leon Romanovsky <leonro@nvidia.com>
> Acked-by: Tariq Toukan <tariqt@nvidia.com>
> ---
> drivers/infiniband/hw/mlx4/main.c | 41 +++++++++++++-------
> drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 +
> drivers/net/ethernet/mellanox/mlx4/en_main.c | 27 +++++++++----
> drivers/net/ethernet/mellanox/mlx4/intf.c | 24 ++++++++----
> drivers/net/ethernet/mellanox/mlx4/main.c | 2 +
> drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 +
> drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 +
> include/linux/mlx4/driver.h | 8 +++-
> 8 files changed, 77 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
> index 7dd70d778b6b..0761c465120b 100644
> --- a/drivers/infiniband/hw/mlx4/main.c
> +++ b/drivers/infiniband/hw/mlx4/main.c
> @@ -82,6 +82,8 @@ static const char mlx4_ib_version[] =
> static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
> static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device,
> u32 port_num);
> +static int mlx4_ib_event(struct notifier_block *this, unsigned long event,
> + void *ptr);
>
> static struct workqueue_struct *wq;
>
> @@ -2836,6 +2838,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
> do_slave_init(ibdev, j, 1);
> }
> }
> +
> + /* register mlx4 core notifier */
> + ibdev->mlx_nb.notifier_call = mlx4_ib_event;
> + err = mlx4_register_event_notifier(dev, &ibdev->mlx_nb);
> + WARN(err, "failed to register mlx4 event notifier (%d)", err);
> +
> return ibdev;
>
> err_notif:
> @@ -2953,6 +2961,8 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
> int p;
> int i;
>
> + mlx4_unregister_event_notifier(dev, &ibdev->mlx_nb);
> +
> mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
> devlink_port_type_clear(mlx4_get_devlink_port(dev, i));
> ibdev->ib_active = false;
> @@ -3173,11 +3183,14 @@ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
> }
> }
>
> -static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
> - enum mlx4_dev_event event, unsigned long param)
> +static int mlx4_ib_event(struct notifier_block *this, unsigned long event,
> + void *ptr)
> {
> + struct mlx4_ib_dev *ibdev =
> + container_of(this, struct mlx4_ib_dev, mlx_nb);
> + struct mlx4_dev *dev = ibdev->dev;
> + unsigned long param = *(unsigned long *)ptr;
You don't need this assignment here as later, you will cast param again,
in your next patches:
3227 if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
3228 eqe = (struct mlx4_eqe *)param;
3229 else
3230 p = (int) param;
so use ptr directly:
if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
eqe = param;
else
p = *(int *) param;
Thanks
@@ -82,6 +82,8 @@ static const char mlx4_ib_version[] =
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device,
u32 port_num);
+static int mlx4_ib_event(struct notifier_block *this, unsigned long event,
+ void *ptr);
static struct workqueue_struct *wq;
@@ -2836,6 +2838,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
do_slave_init(ibdev, j, 1);
}
}
+
+ /* register mlx4 core notifier */
+ ibdev->mlx_nb.notifier_call = mlx4_ib_event;
+ err = mlx4_register_event_notifier(dev, &ibdev->mlx_nb);
+ WARN(err, "failed to register mlx4 event notifier (%d)", err);
+
return ibdev;
err_notif:
@@ -2953,6 +2961,8 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
int p;
int i;
+ mlx4_unregister_event_notifier(dev, &ibdev->mlx_nb);
+
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
devlink_port_type_clear(mlx4_get_devlink_port(dev, i));
ibdev->ib_active = false;
@@ -3173,11 +3183,14 @@ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
}
}
-static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
- enum mlx4_dev_event event, unsigned long param)
+static int mlx4_ib_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
{
+ struct mlx4_ib_dev *ibdev =
+ container_of(this, struct mlx4_ib_dev, mlx_nb);
+ struct mlx4_dev *dev = ibdev->dev;
+ unsigned long param = *(unsigned long *)ptr;
struct ib_event ibev;
- struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
struct mlx4_eqe *eqe = NULL;
struct ib_event_work *ew;
int p = 0;
@@ -3187,11 +3200,11 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
(event == MLX4_DEV_EVENT_PORT_DOWN))) {
ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
if (!ew)
- return;
+ return NOTIFY_DONE;
INIT_WORK(&ew->work, handle_bonded_port_state_event);
ew->ib_dev = ibdev;
queue_work(wq, &ew->work);
- return;
+ return NOTIFY_DONE;
}
if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
@@ -3202,7 +3215,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
switch (event) {
case MLX4_DEV_EVENT_PORT_UP:
if (p > ibdev->num_ports)
- return;
+ return NOTIFY_DONE;
if (!mlx4_is_slave(dev) &&
rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
IB_LINK_LAYER_INFINIBAND) {
@@ -3217,7 +3230,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_PORT_DOWN:
if (p > ibdev->num_ports)
- return;
+ return NOTIFY_DONE;
ibev.event = IB_EVENT_PORT_ERR;
break;
@@ -3230,7 +3243,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
ew = kmalloc(sizeof *ew, GFP_ATOMIC);
if (!ew)
- return;
+ return NOTIFY_DONE;
INIT_WORK(&ew->work, handle_port_mgmt_change_event);
memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
@@ -3240,7 +3253,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
queue_work(wq, &ew->work);
else
handle_port_mgmt_change_event(&ew->work);
- return;
+ return NOTIFY_DONE;
case MLX4_DEV_EVENT_SLAVE_INIT:
/* here, p is the slave id */
@@ -3256,7 +3269,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1);
}
}
- return;
+ return NOTIFY_DONE;
case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
if (mlx4_is_master(dev)) {
@@ -3272,22 +3285,22 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
}
/* here, p is the slave id */
do_slave_init(ibdev, p, 0);
- return;
+ return NOTIFY_DONE;
default:
- return;
+ return NOTIFY_DONE;
}
- ibev.device = ibdev_ptr;
+ ibev.device = &ibdev->ib_dev;
ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
ib_dispatch_event(&ibev);
+ return NOTIFY_DONE;
}
static struct mlx4_interface mlx4_ib_interface = {
.add = mlx4_ib_add,
.remove = mlx4_ib_remove,
- .event = mlx4_ib_event,
.protocol = MLX4_PROT_IB_IPV6,
.flags = MLX4_INTFF_BONDING
};
@@ -38,6 +38,7 @@
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/idr.h>
+#include <linux/notifier.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
@@ -644,6 +645,7 @@ struct mlx4_ib_dev {
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
struct mlx4_ib_diag_counters diag_counters[MLX4_DIAG_COUNTERS_TYPES];
+ struct notifier_block mlx_nb;
};
struct ib_event_work {
@@ -183,17 +183,20 @@ static void mlx4_en_get_profile(struct mlx4_en_dev *mdev)
}
}
-static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr,
- enum mlx4_dev_event event, unsigned long port)
+static int mlx4_en_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
{
- struct mlx4_en_dev *mdev = (struct mlx4_en_dev *) endev_ptr;
+ struct mlx4_en_dev *mdev =
+ container_of(this, struct mlx4_en_dev, mlx_nb);
+ struct mlx4_dev *dev = mdev->dev;
+ unsigned long port = *(unsigned long *)ptr;
struct mlx4_en_priv *priv;
switch (event) {
case MLX4_DEV_EVENT_PORT_UP:
case MLX4_DEV_EVENT_PORT_DOWN:
if (!mdev->pndev[port])
- return;
+ return NOTIFY_DONE;
priv = netdev_priv(mdev->pndev[port]);
/* To prevent races, we poll the link state in a separate
task rather than changing it here */
@@ -211,10 +214,12 @@ static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr,
default:
if (port < 1 || port > dev->caps.num_ports ||
!mdev->pndev[port])
- return;
- mlx4_warn(mdev, "Unhandled event %d for port %d\n", event,
+ return NOTIFY_DONE;
+ mlx4_warn(mdev, "Unhandled event %d for port %d\n", (int) event,
(int) port);
}
+
+ return NOTIFY_DONE;
}
static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr)
@@ -222,6 +227,8 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr)
struct mlx4_en_dev *mdev = endev_ptr;
int i;
+ mlx4_unregister_event_notifier(dev, &mdev->mlx_nb);
+
mutex_lock(&mdev->state_lock);
mdev->device_up = false;
mutex_unlock(&mdev->state_lock);
@@ -263,7 +270,7 @@ static void mlx4_en_activate(struct mlx4_dev *dev, void *ctx)
static void *mlx4_en_add(struct mlx4_dev *dev)
{
struct mlx4_en_dev *mdev;
- int i;
+ int err, i;
printk_once(KERN_INFO "%s", mlx4_en_version);
@@ -326,6 +333,11 @@ static void *mlx4_en_add(struct mlx4_dev *dev)
mutex_init(&mdev->state_lock);
mdev->device_up = true;
+ /* register mlx4 core notifier */
+ mdev->mlx_nb.notifier_call = mlx4_en_event;
+ err = mlx4_register_event_notifier(dev, &mdev->mlx_nb);
+ WARN(err, "failed to register mlx4 event notifier (%d)", err);
+
return mdev;
err_mr:
@@ -346,7 +358,6 @@ static void *mlx4_en_add(struct mlx4_dev *dev)
static struct mlx4_interface mlx4_en_interface = {
.add = mlx4_en_add,
.remove = mlx4_en_remove,
- .event = mlx4_en_event,
.protocol = MLX4_PROT_ETH,
.activate = mlx4_en_activate,
};
@@ -183,17 +183,27 @@ void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type,
unsigned long param)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- struct mlx4_device_context *dev_ctx;
- unsigned long flags;
- spin_lock_irqsave(&priv->ctx_lock, flags);
+ atomic_notifier_call_chain(&priv->event_nh, type, ¶m);
+}
- list_for_each_entry(dev_ctx, &priv->ctx_list, list)
- if (dev_ctx->intf->event)
- dev_ctx->intf->event(dev, dev_ctx->context, type, param);
+int mlx4_register_event_notifier(struct mlx4_dev *dev,
+ struct notifier_block *nb)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
- spin_unlock_irqrestore(&priv->ctx_lock, flags);
+ return atomic_notifier_chain_register(&priv->event_nh, nb);
+}
+EXPORT_SYMBOL(mlx4_register_event_notifier);
+
+int mlx4_unregister_event_notifier(struct mlx4_dev *dev,
+ struct notifier_block *nb)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ return atomic_notifier_chain_unregister(&priv->event_nh, nb);
}
+EXPORT_SYMBOL(mlx4_unregister_event_notifier);
int mlx4_register_device(struct mlx4_dev *dev)
{
@@ -3378,6 +3378,8 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
+ ATOMIC_INIT_NOTIFIER_HEAD(&priv->event_nh);
+
mutex_init(&priv->port_mutex);
mutex_init(&priv->bond_mutex);
@@ -47,6 +47,7 @@
#include <linux/spinlock.h>
#include <net/devlink.h>
#include <linux/rwsem.h>
+#include <linux/notifier.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/driver.h>
@@ -878,6 +879,7 @@ struct mlx4_priv {
struct list_head dev_list;
struct list_head ctx_list;
spinlock_t ctx_lock;
+ struct atomic_notifier_head event_nh;
int pci_dev_data;
int removed;
@@ -49,6 +49,7 @@
#include <linux/ptp_clock_kernel.h>
#include <linux/irq.h>
#include <net/xdp.h>
+#include <linux/notifier.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/qp.h>
@@ -433,6 +434,7 @@ struct mlx4_en_dev {
struct ptp_clock *ptp_clock;
struct ptp_clock_info ptp_clock_info;
struct notifier_block netdev_nb;
+ struct notifier_block mlx_nb;
};
@@ -34,6 +34,7 @@
#define MLX4_DRIVER_H
#include <net/devlink.h>
+#include <linux/notifier.h>
#include <linux/mlx4/device.h>
struct mlx4_dev;
@@ -57,8 +58,6 @@ enum {
struct mlx4_interface {
void * (*add) (struct mlx4_dev *dev);
void (*remove)(struct mlx4_dev *dev, void *context);
- void (*event) (struct mlx4_dev *dev, void *context,
- enum mlx4_dev_event event, unsigned long param);
void (*activate)(struct mlx4_dev *dev, void *context);
struct list_head list;
enum mlx4_protocol protocol;
@@ -87,6 +86,11 @@ struct mlx4_port_map {
int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p);
+int mlx4_register_event_notifier(struct mlx4_dev *dev,
+ struct notifier_block *nb);
+int mlx4_unregister_event_notifier(struct mlx4_dev *dev,
+ struct notifier_block *nb);
+
struct devlink_port *mlx4_get_devlink_port(struct mlx4_dev *dev, int port);
#endif /* MLX4_DRIVER_H */