[v2,04/11] iommufd: Pass parent hwpt and user_data to iommufd_hw_pagetable_alloc()

Message ID 20230511143844.22693-5-yi.l.liu@intel.com
State New
Headers
Series iommufd: Add nesting infrastructure |

Commit Message

Yi Liu May 11, 2023, 2:38 p.m. UTC
  Nested translation has stage-1 and stage-2 page tables. A stage-1 page
table is managed by user space, and it needs to work with a stage-2 page
table, which is a parent hwpt for the stage-1 hwpt.

iommu core already supports accepting parent iommu_domain and user_data
to allocate an iommu_domain. This makes iommufd_hw_pagetable_alloc() to
accept the parent hwpt and user_data, and relays them to iommu core, to
prepare for supporting hw_pagetable allocation with user_data.

Also, add a parent pointer in struct iommufd_hw_pagetable for taking and
releasing its refcount.

Co-developed-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/iommu/iommufd/device.c          |  2 +-
 drivers/iommu/iommufd/hw_pagetable.c    | 34 ++++++++++++++++++++++---
 drivers/iommu/iommufd/iommufd_private.h |  7 ++++-
 3 files changed, 38 insertions(+), 5 deletions(-)
  

Comments

Tian, Kevin May 19, 2023, 9:06 a.m. UTC | #1
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Thursday, May 11, 2023 10:39 PM
> @@ -61,6 +63,8 @@ int iommufd_hw_pagetable_enforce_cc(struct
> iommufd_hw_pagetable *hwpt)
>   * @ictx: iommufd context
>   * @ioas: IOAS to associate the domain with
>   * @idev: Device to get an iommu_domain for
> + * @parent: Optional parent HWPT to associate with the domain with

remove the trailing "the domain with"

> @@ -73,14 +77,22 @@ int iommufd_hw_pagetable_enforce_cc(struct
> iommufd_hw_pagetable *hwpt)
>   */
>  struct iommufd_hw_pagetable *
>  iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct
> iommufd_ioas *ioas,
> -			   struct iommufd_device *idev, bool
> immediate_attach)
> +			   struct iommufd_device *idev,
> +			   struct iommufd_hw_pagetable *parent,
> +			   union iommu_domain_user_data *user_data,
> +			   bool immediate_attach)
>  {
>  	const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
> +	struct iommu_domain *parent_domain = NULL;
>  	struct iommufd_hw_pagetable *hwpt;
> +	bool type_unmanaged, type_nested;
>  	int rc;
> 
>  	lockdep_assert_held(&ioas->mutex);
> 
> +	if ((user_data || parent) && !ops->domain_alloc_user)
> +		return ERR_PTR(-EOPNOTSUPP);

Do we allow specifying parent w/o user_data?

> @@ -99,6 +117,15 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx
> *ictx, struct iommufd_ioas *ioas,
>  		goto out_abort;
>  	}
> 
> +	/* It must be either NESTED or UNMANAGED, depending on
> parent_domain */
> +       type_nested = hwpt->domain->type == IOMMU_DOMAIN_NESTED;
> +       type_unmanaged = hwpt->domain->type ==
> IOMMU_DOMAIN_UNMANAGED;

no need of one-time used variables. Just put the conditions directly
in WARN_ON.

> +       if (WARN_ON((parent_domain && !type_nested) ||
> +                   (!parent_domain && !type_unmanaged))) {
> +		rc = -EINVAL;
> +		goto out_abort;
> +	}
> +

probably just WARN_ON_ONCE() to mark that driver has problem?
  
Tian, Kevin May 19, 2023, 9:34 a.m. UTC | #2
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Thursday, May 11, 2023 10:39 PM
> @@ -89,9 +101,15 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx
> *ictx, struct iommufd_ioas *ioas,
>  	/* Pairs with iommufd_hw_pagetable_destroy() */
>  	refcount_inc(&ioas->obj.users);
>  	hwpt->ioas = ioas;

presumably a user hwpt doesn't need store ioas?
  
Nicolin Chen May 19, 2023, 7:09 p.m. UTC | #3
On Fri, May 19, 2023 at 09:06:20AM +0000, Tian, Kevin wrote:

> > @@ -73,14 +77,22 @@ int iommufd_hw_pagetable_enforce_cc(struct
> > iommufd_hw_pagetable *hwpt)
> >   */
> >  struct iommufd_hw_pagetable *
> >  iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct
> > iommufd_ioas *ioas,
> > -                        struct iommufd_device *idev, bool
> > immediate_attach)
> > +                        struct iommufd_device *idev,
> > +                        struct iommufd_hw_pagetable *parent,
> > +                        union iommu_domain_user_data *user_data,
> > +                        bool immediate_attach)
> >  {
> >       const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
> > +     struct iommu_domain *parent_domain = NULL;
> >       struct iommufd_hw_pagetable *hwpt;
> > +     bool type_unmanaged, type_nested;
> >       int rc;
> >
> >       lockdep_assert_held(&ioas->mutex);
> >
> > +     if ((user_data || parent) && !ops->domain_alloc_user)
> > +             return ERR_PTR(-EOPNOTSUPP);
> 
> Do we allow specifying parent w/o user_data?

I don't think so. Perhaps we should do a double check:

+	if (!!user_data ^ !!parent)
+		return ERR_PTR(-EINVAL);
+	if (user_data && !ops->domain_alloc_user)
+		return ERR_PTR(-EOPNOTSUPP);

> > @@ -99,6 +117,15 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx
> > *ictx, struct iommufd_ioas *ioas,
> >               goto out_abort;
> >       }
> >
> > +     /* It must be either NESTED or UNMANAGED, depending on
> > parent_domain */
> > +       type_nested = hwpt->domain->type == IOMMU_DOMAIN_NESTED;
> > +       type_unmanaged = hwpt->domain->type ==
> > IOMMU_DOMAIN_UNMANAGED;
> 
> no need of one-time used variables. Just put the conditions directly
> in WARN_ON.

It is to improve the readability. Otherwise, we'd have:

	if (WARN_ON((parent_domain &&
		     hwpt->domain->type != IOMMU_DOMAIN_NESTED) ||
		    (!parent_domain &&
		     hwpt->domain->type != IOMMU_DOMAIN_UNMANAGED)))

> > +       if (WARN_ON((parent_domain && !type_nested) ||
> > +                   (!parent_domain && !type_unmanaged))) {
> > +             rc = -EINVAL;
> > +             goto out_abort;
> > +     }
> > +
> 
> probably just WARN_ON_ONCE() to mark that driver has problem?

Yea. I think we could do that.

Thanks
Nic
  
Nicolin Chen May 19, 2023, 7:12 p.m. UTC | #4
On Fri, May 19, 2023 at 09:34:38AM +0000, Tian, Kevin wrote:

> > From: Liu, Yi L <yi.l.liu@intel.com>
> > Sent: Thursday, May 11, 2023 10:39 PM
> > @@ -89,9 +101,15 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx
> > *ictx, struct iommufd_ioas *ioas,
> >       /* Pairs with iommufd_hw_pagetable_destroy() */
> >       refcount_inc(&ioas->obj.users);
> >       hwpt->ioas = ioas;
> 
> presumably a user hwpt doesn't need store ioas?

hwpt->ioas has the refcount and mutex that are used by a user
hwpt too throughout the hwpt functions.

Thanks
Nic
  
Tian, Kevin May 24, 2023, 5:11 a.m. UTC | #5
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Saturday, May 20, 2023 3:09 AM
> 
> On Fri, May 19, 2023 at 09:06:20AM +0000, Tian, Kevin wrote:
> 
> > > @@ -73,14 +77,22 @@ int iommufd_hw_pagetable_enforce_cc(struct
> > > iommufd_hw_pagetable *hwpt)
> > >   */
> > >  struct iommufd_hw_pagetable *
> > >  iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct
> > > iommufd_ioas *ioas,
> > > -                        struct iommufd_device *idev, bool
> > > immediate_attach)
> > > +                        struct iommufd_device *idev,
> > > +                        struct iommufd_hw_pagetable *parent,
> > > +                        union iommu_domain_user_data *user_data,
> > > +                        bool immediate_attach)
> > >  {
> > >       const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
> > > +     struct iommu_domain *parent_domain = NULL;
> > >       struct iommufd_hw_pagetable *hwpt;
> > > +     bool type_unmanaged, type_nested;
> > >       int rc;
> > >
> > >       lockdep_assert_held(&ioas->mutex);
> > >
> > > +     if ((user_data || parent) && !ops->domain_alloc_user)
> > > +             return ERR_PTR(-EOPNOTSUPP);
> >
> > Do we allow specifying parent w/o user_data?
> 
> I don't think so. Perhaps we should do a double check:
> 
> +	if (!!user_data ^ !!parent)
> +		return ERR_PTR(-EINVAL);

I think we allow creating a s2 hwpt with user_data so it
should be:

	if (parent && !user_data)
		return ERR_PTR(-INVAL);

> > > @@ -99,6 +117,15 @@ iommufd_hw_pagetable_alloc(struct
> iommufd_ctx
> > > *ictx, struct iommufd_ioas *ioas,
> > >               goto out_abort;
> > >       }
> > >
> > > +     /* It must be either NESTED or UNMANAGED, depending on
> > > parent_domain */
> > > +       type_nested = hwpt->domain->type == IOMMU_DOMAIN_NESTED;
> > > +       type_unmanaged = hwpt->domain->type ==
> > > IOMMU_DOMAIN_UNMANAGED;
> >
> > no need of one-time used variables. Just put the conditions directly
> > in WARN_ON.
> 
> It is to improve the readability. Otherwise, we'd have:
> 
> 	if (WARN_ON((parent_domain &&
> 		     hwpt->domain->type != IOMMU_DOMAIN_NESTED) ||
> 		    (!parent_domain &&
> 		     hwpt->domain->type !=
> IOMMU_DOMAIN_UNMANAGED)))

IMHO this is already very clear w/o defining additional variables. 😊
  
Nicolin Chen May 24, 2023, 5:31 a.m. UTC | #6
On Wed, May 24, 2023 at 05:11:43AM +0000, Tian, Kevin wrote:
 
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Saturday, May 20, 2023 3:09 AM
> >
> > On Fri, May 19, 2023 at 09:06:20AM +0000, Tian, Kevin wrote:
> >
> > > > @@ -73,14 +77,22 @@ int iommufd_hw_pagetable_enforce_cc(struct
> > > > iommufd_hw_pagetable *hwpt)
> > > >   */
> > > >  struct iommufd_hw_pagetable *
> > > >  iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct
> > > > iommufd_ioas *ioas,
> > > > -                        struct iommufd_device *idev, bool
> > > > immediate_attach)
> > > > +                        struct iommufd_device *idev,
> > > > +                        struct iommufd_hw_pagetable *parent,
> > > > +                        union iommu_domain_user_data *user_data,
> > > > +                        bool immediate_attach)
> > > >  {
> > > >       const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
> > > > +     struct iommu_domain *parent_domain = NULL;
> > > >       struct iommufd_hw_pagetable *hwpt;
> > > > +     bool type_unmanaged, type_nested;
> > > >       int rc;
> > > >
> > > >       lockdep_assert_held(&ioas->mutex);
> > > >
> > > > +     if ((user_data || parent) && !ops->domain_alloc_user)
> > > > +             return ERR_PTR(-EOPNOTSUPP);
> > >
> > > Do we allow specifying parent w/o user_data?
> >
> > I don't think so. Perhaps we should do a double check:
> >
> > +     if (!!user_data ^ !!parent)
> > +             return ERR_PTR(-EINVAL);
> 
> I think we allow creating a s2 hwpt with user_data so it
> should be:
> 
>         if (parent && !user_data)
>                 return ERR_PTR(-INVAL);

Oh, yes. I forgot about that :)

> > > > @@ -99,6 +117,15 @@ iommufd_hw_pagetable_alloc(struct
> > iommufd_ctx
> > > > *ictx, struct iommufd_ioas *ioas,
> > > >               goto out_abort;
> > > >       }
> > > >
> > > > +     /* It must be either NESTED or UNMANAGED, depending on
> > > > parent_domain */
> > > > +       type_nested = hwpt->domain->type == IOMMU_DOMAIN_NESTED;
> > > > +       type_unmanaged = hwpt->domain->type ==
> > > > IOMMU_DOMAIN_UNMANAGED;
> > >
> > > no need of one-time used variables. Just put the conditions directly
> > > in WARN_ON.
> >
> > It is to improve the readability. Otherwise, we'd have:
> >
> >       if (WARN_ON((parent_domain &&
> >                    hwpt->domain->type != IOMMU_DOMAIN_NESTED) ||
> >                   (!parent_domain &&
> >                    hwpt->domain->type !=
> > IOMMU_DOMAIN_UNMANAGED)))
> 
> IMHO this is already very clear w/o defining additional variables. 😊

Okay. I think we can revert this back and drop the two type_*.

Thanks
Nic
  

Patch

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 4541d785bfd8..58c4deb3cb5d 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -584,7 +584,7 @@  iommufd_device_auto_get_domain(struct iommufd_device *idev,
 	}
 
 	hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev,
-					  immediate_attach);
+					  NULL, NULL, immediate_attach);
 	if (IS_ERR(hwpt)) {
 		destroy_hwpt = ERR_CAST(hwpt);
 		goto out_unlock;
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index b6323ad9c32d..ec9b140939ce 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -24,6 +24,8 @@  void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
 	if (hwpt->domain)
 		iommu_domain_free(hwpt->domain);
 
+	if (hwpt->parent)
+		refcount_dec(&hwpt->parent->obj.users);
 	refcount_dec(&hwpt->ioas->obj.users);
 }
 
@@ -61,6 +63,8 @@  int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt)
  * @ictx: iommufd context
  * @ioas: IOAS to associate the domain with
  * @idev: Device to get an iommu_domain for
+ * @parent: Optional parent HWPT to associate with the domain with
+ * @user_data: Optional user_data pointer
  * @immediate_attach: True if idev should be attached to the hwpt
  *
  * Allocate a new iommu_domain and return it as a hw_pagetable. The HWPT
@@ -73,14 +77,22 @@  int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt)
  */
 struct iommufd_hw_pagetable *
 iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
-			   struct iommufd_device *idev, bool immediate_attach)
+			   struct iommufd_device *idev,
+			   struct iommufd_hw_pagetable *parent,
+			   union iommu_domain_user_data *user_data,
+			   bool immediate_attach)
 {
 	const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
+	struct iommu_domain *parent_domain = NULL;
 	struct iommufd_hw_pagetable *hwpt;
+	bool type_unmanaged, type_nested;
 	int rc;
 
 	lockdep_assert_held(&ioas->mutex);
 
+	if ((user_data || parent) && !ops->domain_alloc_user)
+		return ERR_PTR(-EOPNOTSUPP);
+
 	hwpt = iommufd_object_alloc(ictx, hwpt, IOMMUFD_OBJ_HW_PAGETABLE);
 	if (IS_ERR(hwpt))
 		return hwpt;
@@ -89,9 +101,15 @@  iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
 	/* Pairs with iommufd_hw_pagetable_destroy() */
 	refcount_inc(&ioas->obj.users);
 	hwpt->ioas = ioas;
+	if (parent) {
+		hwpt->parent = parent;
+		parent_domain = parent->domain;
+		refcount_inc(&parent->obj.users);
+	}
 
 	if (ops->domain_alloc_user)
-		hwpt->domain = ops->domain_alloc_user(idev->dev, NULL, NULL);
+		hwpt->domain = ops->domain_alloc_user(idev->dev,
+						      parent_domain, user_data);
 	else
 		hwpt->domain = iommu_domain_alloc(idev->dev->bus);
 	if (!hwpt->domain) {
@@ -99,6 +117,15 @@  iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
 		goto out_abort;
 	}
 
+	/* It must be either NESTED or UNMANAGED, depending on parent_domain */
+       type_nested = hwpt->domain->type == IOMMU_DOMAIN_NESTED;
+       type_unmanaged = hwpt->domain->type == IOMMU_DOMAIN_UNMANAGED;
+       if (WARN_ON((parent_domain && !type_nested) ||
+                   (!parent_domain && !type_unmanaged))) {
+		rc = -EINVAL;
+		goto out_abort;
+	}
+
 	/*
 	 * Set the coherency mode before we do iopt_table_add_domain() as some
 	 * iommus have a per-PTE bit that controls it and need to decide before
@@ -160,7 +187,8 @@  int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
 	}
 
 	mutex_lock(&ioas->mutex);
-	hwpt = iommufd_hw_pagetable_alloc(ucmd->ictx, ioas, idev, false);
+	hwpt = iommufd_hw_pagetable_alloc(ucmd->ictx, ioas, idev,
+					  NULL, NULL, false);
 	if (IS_ERR(hwpt)) {
 		rc = PTR_ERR(hwpt);
 		goto out_unlock;
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 69d6bb61d387..9fe807e0aed6 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -8,6 +8,7 @@ 
 #include <linux/xarray.h>
 #include <linux/refcount.h>
 #include <linux/uaccess.h>
+#include <linux/iommu.h>
 
 struct iommu_domain;
 struct iommu_group;
@@ -243,6 +244,7 @@  int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
  */
 struct iommufd_hw_pagetable {
 	struct iommufd_object obj;
+	struct iommufd_hw_pagetable *parent;
 	struct iommufd_ioas *ioas;
 	struct iommu_domain *domain;
 	bool auto_domain : 1;
@@ -254,7 +256,10 @@  struct iommufd_hw_pagetable {
 
 struct iommufd_hw_pagetable *
 iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
-			   struct iommufd_device *idev, bool immediate_attach);
+			   struct iommufd_device *idev,
+			   struct iommufd_hw_pagetable *parent,
+			   union iommu_domain_user_data *user_data,
+			   bool immediate_attach);
 int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt);
 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
 				struct iommufd_device *idev);