[vhost,v2,8/8] vdpa/mlx5: Add mkey leak detection
Commit Message
Track allocated mrs in a list and show warning when leaks are detected
on device free or reset.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
---
drivers/vdpa/mlx5/core/mlx5_vdpa.h | 2 ++
drivers/vdpa/mlx5/core/mr.c | 23 +++++++++++++++++++++++
drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 ++
3 files changed, 27 insertions(+)
Comments
On Tue, Dec 5, 2023 at 11:47 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>
> Track allocated mrs in a list and show warning when leaks are detected
> on device free or reset.
>
> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> Reviewed-by: Gal Pressman <gal@nvidia.com>
Acked-by: Eugenio Pérez <eperezma@redhat.com>
> ---
> drivers/vdpa/mlx5/core/mlx5_vdpa.h | 2 ++
> drivers/vdpa/mlx5/core/mr.c | 23 +++++++++++++++++++++++
> drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 ++
> 3 files changed, 27 insertions(+)
>
> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> index 1a0d27b6e09a..50aac8fe57ef 100644
> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> @@ -37,6 +37,7 @@ struct mlx5_vdpa_mr {
> bool user_mr;
>
> refcount_t refcount;
> + struct list_head mr_list;
> };
>
> struct mlx5_vdpa_resources {
> @@ -95,6 +96,7 @@ struct mlx5_vdpa_dev {
> u32 generation;
>
> struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS];
> + struct list_head mr_list_head;
> /* serialize mr access */
> struct mutex mr_mtx;
> struct mlx5_control_vq cvq;
> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> index c7dc8914354a..4758914ccf86 100644
> --- a/drivers/vdpa/mlx5/core/mr.c
> +++ b/drivers/vdpa/mlx5/core/mr.c
> @@ -508,6 +508,8 @@ static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_
>
> vhost_iotlb_free(mr->iotlb);
>
> + list_del(&mr->mr_list);
> +
> kfree(mr);
> }
>
> @@ -560,12 +562,31 @@ void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev,
> mutex_unlock(&mvdev->mr_mtx);
> }
>
> +static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev)
> +{
> + struct mlx5_vdpa_mr *mr;
> +
> + mutex_lock(&mvdev->mr_mtx);
> +
> + list_for_each_entry(mr, &mvdev->mr_list_head, mr_list) {
> +
> + mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: "
> + "mr: %p, mkey: 0x%x, refcount: %u\n",
> + mr, mr->mkey, refcount_read(&mr->refcount));
> + }
> +
> + mutex_unlock(&mvdev->mr_mtx);
> +
> +}
> +
> void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev)
> {
> for (int i = 0; i < MLX5_VDPA_NUM_AS; i++)
> mlx5_vdpa_update_mr(mvdev, NULL, i);
>
> prune_iotlb(mvdev->cvq.iotlb);
> +
> + mlx5_vdpa_show_mr_leaks(mvdev);
> }
>
> static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> @@ -592,6 +613,8 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> if (err)
> goto err_iotlb;
>
> + list_add_tail(&mr->mr_list, &mvdev->mr_list_head);
> +
> return 0;
>
> err_iotlb:
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 133cbb66dcfe..778821bab7d9 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -3722,6 +3722,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
> if (err)
> goto err_mpfs;
>
> + INIT_LIST_HEAD(&mvdev->mr_list_head);
> +
> if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
> err = mlx5_vdpa_create_dma_mr(mvdev);
> if (err)
> --
> 2.42.0
>
@@ -37,6 +37,7 @@ struct mlx5_vdpa_mr {
bool user_mr;
refcount_t refcount;
+ struct list_head mr_list;
};
struct mlx5_vdpa_resources {
@@ -95,6 +96,7 @@ struct mlx5_vdpa_dev {
u32 generation;
struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS];
+ struct list_head mr_list_head;
/* serialize mr access */
struct mutex mr_mtx;
struct mlx5_control_vq cvq;
@@ -508,6 +508,8 @@ static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_
vhost_iotlb_free(mr->iotlb);
+ list_del(&mr->mr_list);
+
kfree(mr);
}
@@ -560,12 +562,31 @@ void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev,
mutex_unlock(&mvdev->mr_mtx);
}
+static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev)
+{
+ struct mlx5_vdpa_mr *mr;
+
+ mutex_lock(&mvdev->mr_mtx);
+
+ list_for_each_entry(mr, &mvdev->mr_list_head, mr_list) {
+
+ mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: "
+ "mr: %p, mkey: 0x%x, refcount: %u\n",
+ mr, mr->mkey, refcount_read(&mr->refcount));
+ }
+
+ mutex_unlock(&mvdev->mr_mtx);
+
+}
+
void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev)
{
for (int i = 0; i < MLX5_VDPA_NUM_AS; i++)
mlx5_vdpa_update_mr(mvdev, NULL, i);
prune_iotlb(mvdev->cvq.iotlb);
+
+ mlx5_vdpa_show_mr_leaks(mvdev);
}
static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
@@ -592,6 +613,8 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
if (err)
goto err_iotlb;
+ list_add_tail(&mr->mr_list, &mvdev->mr_list_head);
+
return 0;
err_iotlb:
@@ -3722,6 +3722,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
if (err)
goto err_mpfs;
+ INIT_LIST_HEAD(&mvdev->mr_list_head);
+
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
err = mlx5_vdpa_create_dma_mr(mvdev);
if (err)