[v2,03/10] iommufd: Create access in vfio_iommufd_emulated_bind()

Message ID 6083ba9a3c1d92baf1f324d4748333d80d3de830.1675802050.git.nicolinc@nvidia.com
State New
Headers
Series Add IO page table replacement support |

Commit Message

Nicolin Chen Feb. 7, 2023, 9:17 p.m. UTC
  To prepare for an access->ioas replacement, move iommufd_access_create()
call into vfio_iommufd_emulated_bind(), making it symmetric with the
__vfio_iommufd_access_destroy() call in vfio_iommufd_emulated_unbind().
This means an access is created/destroyed by the bind()/unbind(), and the
vfio_iommufd_emulated_attach_ioas() only updates the access->ioas pointer.

Since there's no longer an ioas_id input for iommufd_access_create(), add
a new helper iommufd_access_set_ioas() to set access->ioas. We can later
add a "replace" feature simply to the new iommufd_access_set_ioas() too.

Leaving the access->ioas in vfio_iommufd_emulated_attach_ioas(), however,
can introduce some potential of a race condition during pin_/unpin_pages()
call where access->ioas->iopt is getting referenced. So, add an ioas_lock
to protect it.

Note that the "refcount_dec(&access->ioas->obj.users)" line is also moved
to the new iommufd_access_set_ioas() from iommufd_access_destroy_object()
for symmetry. Without this change, the old_ioas would also lose the track
of its refcount when the replace support is added.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/iommufd/device.c          | 100 ++++++++++++++++++------
 drivers/iommu/iommufd/iommufd_private.h |   1 +
 drivers/iommu/iommufd/selftest.c        |   5 +-
 drivers/vfio/iommufd.c                  |  27 ++++---
 include/linux/iommufd.h                 |   3 +-
 5 files changed, 97 insertions(+), 39 deletions(-)
  

Comments

Tian, Kevin Feb. 9, 2023, 2:56 a.m. UTC | #1
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Wednesday, February 8, 2023 5:18 AM
>
> @@ -141,10 +141,19 @@ static const struct iommufd_access_ops
> vfio_user_ops = {
>  int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
>  			       struct iommufd_ctx *ictx, u32 *out_device_id)
>  {
> +	struct iommufd_access *user;
> +
>  	lockdep_assert_held(&vdev->dev_set->lock);
> 
> -	vdev->iommufd_ictx = ictx;
>  	iommufd_ctx_get(ictx);
> +	user = iommufd_access_create(vdev->iommufd_ictx, &vfio_user_ops,
> vdev);
> +	if (IS_ERR(user)) {
> +		iommufd_ctx_put(vdev->iommufd_ictx);
> +		return PTR_ERR(user);
> +	}
> +	iommufd_access_set_ioas(user, 0);

this is not required since ioas has been NULL after creation.

otherwise,

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
  
Nicolin Chen Feb. 9, 2023, 4:15 p.m. UTC | #2
On Thu, Feb 09, 2023 at 02:56:39AM +0000, Tian, Kevin wrote:
> External email: Use caution opening links or attachments
> 
> 
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Wednesday, February 8, 2023 5:18 AM
> >
> > @@ -141,10 +141,19 @@ static const struct iommufd_access_ops
> > vfio_user_ops = {
> >  int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
> >                              struct iommufd_ctx *ictx, u32 *out_device_id)
> >  {
> > +     struct iommufd_access *user;
> > +
> >       lockdep_assert_held(&vdev->dev_set->lock);
> >
> > -     vdev->iommufd_ictx = ictx;
> >       iommufd_ctx_get(ictx);
> > +     user = iommufd_access_create(vdev->iommufd_ictx, &vfio_user_ops,
> > vdev);
> > +     if (IS_ERR(user)) {
> > +             iommufd_ctx_put(vdev->iommufd_ictx);
> > +             return PTR_ERR(user);
> > +     }
> > +     iommufd_access_set_ioas(user, 0);
> 
> this is not required since ioas has been NULL after creation.

Will drop it.

> otherwise,
> 
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>

And add this too.

Thanks!
Nic
  
Eric Farman Feb. 9, 2023, 6:58 p.m. UTC | #3
On Tue, 2023-02-07 at 13:17 -0800, Nicolin Chen wrote:
...snip...
> diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
> index 026f81a87dd7..dc9feab73db7 100644
> --- a/drivers/vfio/iommufd.c
> +++ b/drivers/vfio/iommufd.c
> @@ -141,10 +141,19 @@ static const struct iommufd_access_ops
> vfio_user_ops = {
>  int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
>                                struct iommufd_ctx *ictx, u32
> *out_device_id)
>  {
> +       struct iommufd_access *user;
> +
>         lockdep_assert_held(&vdev->dev_set->lock);
>  
> -       vdev->iommufd_ictx = ictx;
>         iommufd_ctx_get(ictx);
> +       user = iommufd_access_create(vdev->iommufd_ictx,
> &vfio_user_ops, vdev);
> +       if (IS_ERR(user)) {
> +               iommufd_ctx_put(vdev->iommufd_ictx);

Matthew noticed a vfio-ccw and -ap regression that blames this patch.

Probably both the iommufd_access_create() and iommufd_ctx_put() calls
want the ictx variable itself, instead of the (uninitialized) pointer
in the vfio_device. (At least that gets -ccw and -ap working again.)

Thanks,
Eric

> +               return PTR_ERR(user);
> +       }
> +       iommufd_access_set_ioas(user, 0);
> +       vdev->iommufd_access = user;
> +       vdev->iommufd_ictx = ictx;
>         return 0;
>  }
>  EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_bind);
  
Nicolin Chen Feb. 9, 2023, 7:54 p.m. UTC | #4
On Thu, Feb 09, 2023 at 01:58:47PM -0500, Eric Farman wrote:
> External email: Use caution opening links or attachments
> 
> 
> On Tue, 2023-02-07 at 13:17 -0800, Nicolin Chen wrote:
> ...snip...
> > diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
> > index 026f81a87dd7..dc9feab73db7 100644
> > --- a/drivers/vfio/iommufd.c
> > +++ b/drivers/vfio/iommufd.c
> > @@ -141,10 +141,19 @@ static const struct iommufd_access_ops
> > vfio_user_ops = {
> >  int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
> >                                struct iommufd_ctx *ictx, u32
> > *out_device_id)
> >  {
> > +       struct iommufd_access *user;
> > +
> >         lockdep_assert_held(&vdev->dev_set->lock);
> >
> > -       vdev->iommufd_ictx = ictx;
> >         iommufd_ctx_get(ictx);
> > +       user = iommufd_access_create(vdev->iommufd_ictx,
> > &vfio_user_ops, vdev);
> > +       if (IS_ERR(user)) {
> > +               iommufd_ctx_put(vdev->iommufd_ictx);
> 
> Matthew noticed a vfio-ccw and -ap regression that blames this patch.
> 
> Probably both the iommufd_access_create() and iommufd_ctx_put() calls
> want the ictx variable itself, instead of the (uninitialized) pointer
> in the vfio_device. (At least that gets -ccw and -ap working again.)

Oops. Yes, it should be:

	iommufd_ctx_get(ictx);
	user = iommufd_access_create(ictx, &vfio_user_ops, vdev);
	if (IS_ERR(user)) {
		iommufd_ctx_put(ictx);

Will fix in v3.

Thanks!
Nic
  

Patch

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index d81f93a321af..f4bd6f532a90 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -418,9 +418,9 @@  void iommufd_access_destroy_object(struct iommufd_object *obj)
 	struct iommufd_access *access =
 		container_of(obj, struct iommufd_access, obj);
 
-	iopt_remove_access(&access->ioas->iopt, access);
+	iommufd_access_set_ioas(access, 0);
 	iommufd_ctx_put(access->ictx);
-	refcount_dec(&access->ioas->obj.users);
+	mutex_destroy(&access->ioas_lock);
 }
 
 /**
@@ -437,12 +437,10 @@  void iommufd_access_destroy_object(struct iommufd_object *obj)
  * The provided ops are required to use iommufd_access_pin_pages().
  */
 struct iommufd_access *
-iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id,
+iommufd_access_create(struct iommufd_ctx *ictx,
 		      const struct iommufd_access_ops *ops, void *data)
 {
 	struct iommufd_access *access;
-	struct iommufd_object *obj;
-	int rc;
 
 	/*
 	 * There is no uAPI for the access object, but to keep things symmetric
@@ -455,33 +453,18 @@  iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id,
 	access->data = data;
 	access->ops = ops;
 
-	obj = iommufd_get_object(ictx, ioas_id, IOMMUFD_OBJ_IOAS);
-	if (IS_ERR(obj)) {
-		rc = PTR_ERR(obj);
-		goto out_abort;
-	}
-	access->ioas = container_of(obj, struct iommufd_ioas, obj);
-	iommufd_ref_to_users(obj);
-
 	if (ops->needs_pin_pages)
 		access->iova_alignment = PAGE_SIZE;
 	else
 		access->iova_alignment = 1;
-	rc = iopt_add_access(&access->ioas->iopt, access);
-	if (rc)
-		goto out_put_ioas;
 
 	/* The calling driver is a user until iommufd_access_destroy() */
 	refcount_inc(&access->obj.users);
+	mutex_init(&access->ioas_lock);
 	access->ictx = ictx;
 	iommufd_ctx_get(ictx);
 	iommufd_object_finalize(ictx, &access->obj);
 	return access;
-out_put_ioas:
-	refcount_dec(&access->ioas->obj.users);
-out_abort:
-	iommufd_object_abort(ictx, &access->obj);
-	return ERR_PTR(rc);
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD);
 
@@ -500,6 +483,50 @@  void iommufd_access_destroy(struct iommufd_access *access)
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
 
+int iommufd_access_set_ioas(struct iommufd_access *access, u32 ioas_id)
+{
+	struct iommufd_ioas *new_ioas = NULL, *cur_ioas;
+	struct iommufd_ctx *ictx = access->ictx;
+	struct iommufd_object *obj;
+	int rc = 0;
+
+	if (ioas_id) {
+		obj = iommufd_get_object(ictx, ioas_id, IOMMUFD_OBJ_IOAS);
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+		new_ioas = container_of(obj, struct iommufd_ioas, obj);
+	}
+
+	mutex_lock(&access->ioas_lock);
+	cur_ioas = access->ioas;
+	if (cur_ioas == new_ioas)
+		goto out_unlock;
+
+	if (new_ioas) {
+		rc = iopt_add_access(&new_ioas->iopt, access);
+		if (rc)
+			goto out_unlock;
+		iommufd_ref_to_users(obj);
+	}
+
+	if (cur_ioas) {
+		iopt_remove_access(&cur_ioas->iopt, access);
+		refcount_dec(&cur_ioas->obj.users);
+	}
+
+	access->ioas = new_ioas;
+	mutex_unlock(&access->ioas_lock);
+
+	return 0;
+
+out_unlock:
+	mutex_unlock(&access->ioas_lock);
+	if (new_ioas)
+		iommufd_put_object(obj);
+	return rc;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_access_set_ioas, IOMMUFD);
+
 /**
  * iommufd_access_notify_unmap - Notify users of an iopt to stop using it
  * @iopt: iopt to work on
@@ -550,8 +577,8 @@  void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
 void iommufd_access_unpin_pages(struct iommufd_access *access,
 				unsigned long iova, unsigned long length)
 {
-	struct io_pagetable *iopt = &access->ioas->iopt;
 	struct iopt_area_contig_iter iter;
+	struct io_pagetable *iopt;
 	unsigned long last_iova;
 	struct iopt_area *area;
 
@@ -559,6 +586,13 @@  void iommufd_access_unpin_pages(struct iommufd_access *access,
 	    WARN_ON(check_add_overflow(iova, length - 1, &last_iova)))
 		return;
 
+	mutex_lock(&access->ioas_lock);
+	if (!access->ioas) {
+		mutex_unlock(&access->ioas_lock);
+		return;
+	}
+	iopt = &access->ioas->iopt;
+
 	down_read(&iopt->iova_rwsem);
 	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
 		iopt_area_remove_access(
@@ -568,6 +602,7 @@  void iommufd_access_unpin_pages(struct iommufd_access *access,
 				min(last_iova, iopt_area_last_iova(area))));
 	up_read(&iopt->iova_rwsem);
 	WARN_ON(!iopt_area_contig_done(&iter));
+	mutex_unlock(&access->ioas_lock);
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD);
 
@@ -613,8 +648,8 @@  int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
 			     unsigned long length, struct page **out_pages,
 			     unsigned int flags)
 {
-	struct io_pagetable *iopt = &access->ioas->iopt;
 	struct iopt_area_contig_iter iter;
+	struct io_pagetable *iopt;
 	unsigned long last_iova;
 	struct iopt_area *area;
 	int rc;
@@ -629,6 +664,13 @@  int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
 	if (check_add_overflow(iova, length - 1, &last_iova))
 		return -EOVERFLOW;
 
+	mutex_lock(&access->ioas_lock);
+	if (!access->ioas) {
+		mutex_unlock(&access->ioas_lock);
+		return -ENOENT;
+	}
+	iopt = &access->ioas->iopt;
+
 	down_read(&iopt->iova_rwsem);
 	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
 		unsigned long last = min(last_iova, iopt_area_last_iova(area));
@@ -659,6 +701,7 @@  int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
 	}
 
 	up_read(&iopt->iova_rwsem);
+	mutex_unlock(&access->ioas_lock);
 	return 0;
 
 err_remove:
@@ -673,6 +716,7 @@  int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
 						  iopt_area_last_iova(area))));
 	}
 	up_read(&iopt->iova_rwsem);
+	mutex_unlock(&access->ioas_lock);
 	return rc;
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD);
@@ -692,8 +736,8 @@  EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD);
 int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
 		      void *data, size_t length, unsigned int flags)
 {
-	struct io_pagetable *iopt = &access->ioas->iopt;
 	struct iopt_area_contig_iter iter;
+	struct io_pagetable *iopt;
 	struct iopt_area *area;
 	unsigned long last_iova;
 	int rc;
@@ -703,6 +747,13 @@  int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
 	if (check_add_overflow(iova, length - 1, &last_iova))
 		return -EOVERFLOW;
 
+	mutex_lock(&access->ioas_lock);
+	if (!access->ioas) {
+		mutex_unlock(&access->ioas_lock);
+		return -ENOENT;
+	}
+	iopt = &access->ioas->iopt;
+
 	down_read(&iopt->iova_rwsem);
 	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
 		unsigned long last = min(last_iova, iopt_area_last_iova(area));
@@ -729,6 +780,7 @@  int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
 		rc = -ENOENT;
 err_out:
 	up_read(&iopt->iova_rwsem);
+	mutex_unlock(&access->ioas_lock);
 	return rc;
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD);
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 222e86591f8a..2f4bb106bac6 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -261,6 +261,7 @@  struct iommufd_access {
 	struct iommufd_object obj;
 	struct iommufd_ctx *ictx;
 	struct iommufd_ioas *ioas;
+	struct mutex ioas_lock;
 	const struct iommufd_access_ops *ops;
 	void *data;
 	unsigned long iova_alignment;
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index cfb5fe9a5e0e..db4011bdc8a9 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -571,7 +571,7 @@  static int iommufd_test_create_access(struct iommufd_ucmd *ucmd,
 	}
 
 	access = iommufd_access_create(
-		ucmd->ictx, ioas_id,
+		ucmd->ictx,
 		(flags & MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES) ?
 			&selftest_access_ops_pin :
 			&selftest_access_ops,
@@ -580,6 +580,9 @@  static int iommufd_test_create_access(struct iommufd_ucmd *ucmd,
 		rc = PTR_ERR(access);
 		goto out_put_fdno;
 	}
+	rc = iommufd_access_set_ioas(access, ioas_id);
+	if (rc)
+		goto out_destroy;
 	cmd->create_access.out_access_fd = fdno;
 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
 	if (rc)
diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
index 026f81a87dd7..dc9feab73db7 100644
--- a/drivers/vfio/iommufd.c
+++ b/drivers/vfio/iommufd.c
@@ -141,10 +141,19 @@  static const struct iommufd_access_ops vfio_user_ops = {
 int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
 			       struct iommufd_ctx *ictx, u32 *out_device_id)
 {
+	struct iommufd_access *user;
+
 	lockdep_assert_held(&vdev->dev_set->lock);
 
-	vdev->iommufd_ictx = ictx;
 	iommufd_ctx_get(ictx);
+	user = iommufd_access_create(vdev->iommufd_ictx, &vfio_user_ops, vdev);
+	if (IS_ERR(user)) {
+		iommufd_ctx_put(vdev->iommufd_ictx);
+		return PTR_ERR(user);
+	}
+	iommufd_access_set_ioas(user, 0);
+	vdev->iommufd_access = user;
+	vdev->iommufd_ictx = ictx;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_bind);
@@ -168,22 +177,14 @@  EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_unbind);
 
 int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id)
 {
-	struct iommufd_access *user;
-
 	lockdep_assert_held(&vdev->dev_set->lock);
 
 	if (!vdev->iommufd_ictx)
 		return -EINVAL;
+	if (!vdev->iommufd_access)
+		return -ENOENT;
 
-	if (vdev->iommufd_access)
-		return -EBUSY;
-
-	user = iommufd_access_create(vdev->iommufd_ictx, *pt_id, &vfio_user_ops,
-				     vdev);
-	if (IS_ERR(user))
-		return PTR_ERR(user);
-	vdev->iommufd_access = user;
-	return 0;
+	return iommufd_access_set_ioas(vdev->iommufd_access, *pt_id);
 }
 EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_attach_ioas);
 
@@ -194,6 +195,6 @@  void vfio_iommufd_emulated_detach_ioas(struct vfio_device *vdev)
 	if (!vdev->iommufd_ictx || !vdev->iommufd_access)
 		return;
 
-	__vfio_iommufd_access_destroy(vdev);
+	iommufd_access_set_ioas(vdev->iommufd_access, 0);
 }
 EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_detach_ioas);
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 9672cf839687..f9bac6f9db2e 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -46,9 +46,10 @@  enum {
 };
 
 struct iommufd_access *
-iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id,
+iommufd_access_create(struct iommufd_ctx *ictx,
 		      const struct iommufd_access_ops *ops, void *data);
 void iommufd_access_destroy(struct iommufd_access *access);
+int iommufd_access_set_ioas(struct iommufd_access *access, u32 ioas_id);
 
 void iommufd_ctx_get(struct iommufd_ctx *ictx);