[v2,04/11] iommufd: Pass parent hwpt and user_data to iommufd_hw_pagetable_alloc()
Commit Message
Nested translation has stage-1 and stage-2 page tables. A stage-1 page
table is managed by user space, and it needs to work with a stage-2 page
table, which is a parent hwpt for the stage-1 hwpt.
iommu core already supports accepting parent iommu_domain and user_data
to allocate an iommu_domain. This makes iommufd_hw_pagetable_alloc() to
accept the parent hwpt and user_data, and relays them to iommu core, to
prepare for supporting hw_pagetable allocation with user_data.
Also, add a parent pointer in struct iommufd_hw_pagetable for taking and
releasing its refcount.
Co-developed-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
drivers/iommu/iommufd/device.c | 2 +-
drivers/iommu/iommufd/hw_pagetable.c | 34 ++++++++++++++++++++++---
drivers/iommu/iommufd/iommufd_private.h | 7 ++++-
3 files changed, 38 insertions(+), 5 deletions(-)
Comments
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Thursday, May 11, 2023 10:39 PM
> @@ -61,6 +63,8 @@ int iommufd_hw_pagetable_enforce_cc(struct
> iommufd_hw_pagetable *hwpt)
> * @ictx: iommufd context
> * @ioas: IOAS to associate the domain with
> * @idev: Device to get an iommu_domain for
> + * @parent: Optional parent HWPT to associate with the domain with
remove the trailing "the domain with"
> @@ -73,14 +77,22 @@ int iommufd_hw_pagetable_enforce_cc(struct
> iommufd_hw_pagetable *hwpt)
> */
> struct iommufd_hw_pagetable *
> iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct
> iommufd_ioas *ioas,
> - struct iommufd_device *idev, bool
> immediate_attach)
> + struct iommufd_device *idev,
> + struct iommufd_hw_pagetable *parent,
> + union iommu_domain_user_data *user_data,
> + bool immediate_attach)
> {
> const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
> + struct iommu_domain *parent_domain = NULL;
> struct iommufd_hw_pagetable *hwpt;
> + bool type_unmanaged, type_nested;
> int rc;
>
> lockdep_assert_held(&ioas->mutex);
>
> + if ((user_data || parent) && !ops->domain_alloc_user)
> + return ERR_PTR(-EOPNOTSUPP);
Do we allow specifying parent w/o user_data?
> @@ -99,6 +117,15 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx
> *ictx, struct iommufd_ioas *ioas,
> goto out_abort;
> }
>
> + /* It must be either NESTED or UNMANAGED, depending on
> parent_domain */
> + type_nested = hwpt->domain->type == IOMMU_DOMAIN_NESTED;
> + type_unmanaged = hwpt->domain->type ==
> IOMMU_DOMAIN_UNMANAGED;
no need of one-time used variables. Just put the conditions directly
in WARN_ON.
> + if (WARN_ON((parent_domain && !type_nested) ||
> + (!parent_domain && !type_unmanaged))) {
> + rc = -EINVAL;
> + goto out_abort;
> + }
> +
probably just WARN_ON_ONCE() to mark that driver has problem?
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Thursday, May 11, 2023 10:39 PM
> @@ -89,9 +101,15 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx
> *ictx, struct iommufd_ioas *ioas,
> /* Pairs with iommufd_hw_pagetable_destroy() */
> refcount_inc(&ioas->obj.users);
> hwpt->ioas = ioas;
presumably a user hwpt doesn't need store ioas?
On Fri, May 19, 2023 at 09:06:20AM +0000, Tian, Kevin wrote:
> > @@ -73,14 +77,22 @@ int iommufd_hw_pagetable_enforce_cc(struct
> > iommufd_hw_pagetable *hwpt)
> > */
> > struct iommufd_hw_pagetable *
> > iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct
> > iommufd_ioas *ioas,
> > - struct iommufd_device *idev, bool
> > immediate_attach)
> > + struct iommufd_device *idev,
> > + struct iommufd_hw_pagetable *parent,
> > + union iommu_domain_user_data *user_data,
> > + bool immediate_attach)
> > {
> > const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
> > + struct iommu_domain *parent_domain = NULL;
> > struct iommufd_hw_pagetable *hwpt;
> > + bool type_unmanaged, type_nested;
> > int rc;
> >
> > lockdep_assert_held(&ioas->mutex);
> >
> > + if ((user_data || parent) && !ops->domain_alloc_user)
> > + return ERR_PTR(-EOPNOTSUPP);
>
> Do we allow specifying parent w/o user_data?
I don't think so. Perhaps we should do a double check:
+ if (!!user_data ^ !!parent)
+ return ERR_PTR(-EINVAL);
+ if (user_data && !ops->domain_alloc_user)
+ return ERR_PTR(-EOPNOTSUPP);
> > @@ -99,6 +117,15 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx
> > *ictx, struct iommufd_ioas *ioas,
> > goto out_abort;
> > }
> >
> > + /* It must be either NESTED or UNMANAGED, depending on
> > parent_domain */
> > + type_nested = hwpt->domain->type == IOMMU_DOMAIN_NESTED;
> > + type_unmanaged = hwpt->domain->type ==
> > IOMMU_DOMAIN_UNMANAGED;
>
> no need of one-time used variables. Just put the conditions directly
> in WARN_ON.
It is to improve the readability. Otherwise, we'd have:
if (WARN_ON((parent_domain &&
hwpt->domain->type != IOMMU_DOMAIN_NESTED) ||
(!parent_domain &&
hwpt->domain->type != IOMMU_DOMAIN_UNMANAGED)))
> > + if (WARN_ON((parent_domain && !type_nested) ||
> > + (!parent_domain && !type_unmanaged))) {
> > + rc = -EINVAL;
> > + goto out_abort;
> > + }
> > +
>
> probably just WARN_ON_ONCE() to mark that driver has problem?
Yea. I think we could do that.
Thanks
Nic
On Fri, May 19, 2023 at 09:34:38AM +0000, Tian, Kevin wrote:
> > From: Liu, Yi L <yi.l.liu@intel.com>
> > Sent: Thursday, May 11, 2023 10:39 PM
> > @@ -89,9 +101,15 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx
> > *ictx, struct iommufd_ioas *ioas,
> > /* Pairs with iommufd_hw_pagetable_destroy() */
> > refcount_inc(&ioas->obj.users);
> > hwpt->ioas = ioas;
>
> presumably a user hwpt doesn't need store ioas?
hwpt->ioas has the refcount and mutex that are used by a user
hwpt too throughout the hwpt functions.
Thanks
Nic
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Saturday, May 20, 2023 3:09 AM
>
> On Fri, May 19, 2023 at 09:06:20AM +0000, Tian, Kevin wrote:
>
> > > @@ -73,14 +77,22 @@ int iommufd_hw_pagetable_enforce_cc(struct
> > > iommufd_hw_pagetable *hwpt)
> > > */
> > > struct iommufd_hw_pagetable *
> > > iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct
> > > iommufd_ioas *ioas,
> > > - struct iommufd_device *idev, bool
> > > immediate_attach)
> > > + struct iommufd_device *idev,
> > > + struct iommufd_hw_pagetable *parent,
> > > + union iommu_domain_user_data *user_data,
> > > + bool immediate_attach)
> > > {
> > > const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
> > > + struct iommu_domain *parent_domain = NULL;
> > > struct iommufd_hw_pagetable *hwpt;
> > > + bool type_unmanaged, type_nested;
> > > int rc;
> > >
> > > lockdep_assert_held(&ioas->mutex);
> > >
> > > + if ((user_data || parent) && !ops->domain_alloc_user)
> > > + return ERR_PTR(-EOPNOTSUPP);
> >
> > Do we allow specifying parent w/o user_data?
>
> I don't think so. Perhaps we should do a double check:
>
> + if (!!user_data ^ !!parent)
> + return ERR_PTR(-EINVAL);
I think we allow creating a s2 hwpt with user_data so it
should be:
if (parent && !user_data)
return ERR_PTR(-INVAL);
> > > @@ -99,6 +117,15 @@ iommufd_hw_pagetable_alloc(struct
> iommufd_ctx
> > > *ictx, struct iommufd_ioas *ioas,
> > > goto out_abort;
> > > }
> > >
> > > + /* It must be either NESTED or UNMANAGED, depending on
> > > parent_domain */
> > > + type_nested = hwpt->domain->type == IOMMU_DOMAIN_NESTED;
> > > + type_unmanaged = hwpt->domain->type ==
> > > IOMMU_DOMAIN_UNMANAGED;
> >
> > no need of one-time used variables. Just put the conditions directly
> > in WARN_ON.
>
> It is to improve the readability. Otherwise, we'd have:
>
> if (WARN_ON((parent_domain &&
> hwpt->domain->type != IOMMU_DOMAIN_NESTED) ||
> (!parent_domain &&
> hwpt->domain->type !=
> IOMMU_DOMAIN_UNMANAGED)))
IMHO this is already very clear w/o defining additional variables. 😊
On Wed, May 24, 2023 at 05:11:43AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Saturday, May 20, 2023 3:09 AM
> >
> > On Fri, May 19, 2023 at 09:06:20AM +0000, Tian, Kevin wrote:
> >
> > > > @@ -73,14 +77,22 @@ int iommufd_hw_pagetable_enforce_cc(struct
> > > > iommufd_hw_pagetable *hwpt)
> > > > */
> > > > struct iommufd_hw_pagetable *
> > > > iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct
> > > > iommufd_ioas *ioas,
> > > > - struct iommufd_device *idev, bool
> > > > immediate_attach)
> > > > + struct iommufd_device *idev,
> > > > + struct iommufd_hw_pagetable *parent,
> > > > + union iommu_domain_user_data *user_data,
> > > > + bool immediate_attach)
> > > > {
> > > > const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
> > > > + struct iommu_domain *parent_domain = NULL;
> > > > struct iommufd_hw_pagetable *hwpt;
> > > > + bool type_unmanaged, type_nested;
> > > > int rc;
> > > >
> > > > lockdep_assert_held(&ioas->mutex);
> > > >
> > > > + if ((user_data || parent) && !ops->domain_alloc_user)
> > > > + return ERR_PTR(-EOPNOTSUPP);
> > >
> > > Do we allow specifying parent w/o user_data?
> >
> > I don't think so. Perhaps we should do a double check:
> >
> > + if (!!user_data ^ !!parent)
> > + return ERR_PTR(-EINVAL);
>
> I think we allow creating a s2 hwpt with user_data so it
> should be:
>
> if (parent && !user_data)
> return ERR_PTR(-INVAL);
Oh, yes. I forgot about that :)
> > > > @@ -99,6 +117,15 @@ iommufd_hw_pagetable_alloc(struct
> > iommufd_ctx
> > > > *ictx, struct iommufd_ioas *ioas,
> > > > goto out_abort;
> > > > }
> > > >
> > > > + /* It must be either NESTED or UNMANAGED, depending on
> > > > parent_domain */
> > > > + type_nested = hwpt->domain->type == IOMMU_DOMAIN_NESTED;
> > > > + type_unmanaged = hwpt->domain->type ==
> > > > IOMMU_DOMAIN_UNMANAGED;
> > >
> > > no need of one-time used variables. Just put the conditions directly
> > > in WARN_ON.
> >
> > It is to improve the readability. Otherwise, we'd have:
> >
> > if (WARN_ON((parent_domain &&
> > hwpt->domain->type != IOMMU_DOMAIN_NESTED) ||
> > (!parent_domain &&
> > hwpt->domain->type !=
> > IOMMU_DOMAIN_UNMANAGED)))
>
> IMHO this is already very clear w/o defining additional variables. 😊
Okay. I think we can revert this back and drop the two type_*.
Thanks
Nic
@@ -584,7 +584,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
}
hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev,
- immediate_attach);
+ NULL, NULL, immediate_attach);
if (IS_ERR(hwpt)) {
destroy_hwpt = ERR_CAST(hwpt);
goto out_unlock;
@@ -24,6 +24,8 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
if (hwpt->domain)
iommu_domain_free(hwpt->domain);
+ if (hwpt->parent)
+ refcount_dec(&hwpt->parent->obj.users);
refcount_dec(&hwpt->ioas->obj.users);
}
@@ -61,6 +63,8 @@ int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt)
* @ictx: iommufd context
* @ioas: IOAS to associate the domain with
* @idev: Device to get an iommu_domain for
+ * @parent: Optional parent HWPT to associate with the domain with
+ * @user_data: Optional user_data pointer
* @immediate_attach: True if idev should be attached to the hwpt
*
* Allocate a new iommu_domain and return it as a hw_pagetable. The HWPT
@@ -73,14 +77,22 @@ int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt)
*/
struct iommufd_hw_pagetable *
iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
- struct iommufd_device *idev, bool immediate_attach)
+ struct iommufd_device *idev,
+ struct iommufd_hw_pagetable *parent,
+ union iommu_domain_user_data *user_data,
+ bool immediate_attach)
{
const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
+ struct iommu_domain *parent_domain = NULL;
struct iommufd_hw_pagetable *hwpt;
+ bool type_unmanaged, type_nested;
int rc;
lockdep_assert_held(&ioas->mutex);
+ if ((user_data || parent) && !ops->domain_alloc_user)
+ return ERR_PTR(-EOPNOTSUPP);
+
hwpt = iommufd_object_alloc(ictx, hwpt, IOMMUFD_OBJ_HW_PAGETABLE);
if (IS_ERR(hwpt))
return hwpt;
@@ -89,9 +101,15 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
/* Pairs with iommufd_hw_pagetable_destroy() */
refcount_inc(&ioas->obj.users);
hwpt->ioas = ioas;
+ if (parent) {
+ hwpt->parent = parent;
+ parent_domain = parent->domain;
+ refcount_inc(&parent->obj.users);
+ }
if (ops->domain_alloc_user)
- hwpt->domain = ops->domain_alloc_user(idev->dev, NULL, NULL);
+ hwpt->domain = ops->domain_alloc_user(idev->dev,
+ parent_domain, user_data);
else
hwpt->domain = iommu_domain_alloc(idev->dev->bus);
if (!hwpt->domain) {
@@ -99,6 +117,15 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
goto out_abort;
}
+ /* It must be either NESTED or UNMANAGED, depending on parent_domain */
+ type_nested = hwpt->domain->type == IOMMU_DOMAIN_NESTED;
+ type_unmanaged = hwpt->domain->type == IOMMU_DOMAIN_UNMANAGED;
+ if (WARN_ON((parent_domain && !type_nested) ||
+ (!parent_domain && !type_unmanaged))) {
+ rc = -EINVAL;
+ goto out_abort;
+ }
+
/*
* Set the coherency mode before we do iopt_table_add_domain() as some
* iommus have a per-PTE bit that controls it and need to decide before
@@ -160,7 +187,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
}
mutex_lock(&ioas->mutex);
- hwpt = iommufd_hw_pagetable_alloc(ucmd->ictx, ioas, idev, false);
+ hwpt = iommufd_hw_pagetable_alloc(ucmd->ictx, ioas, idev,
+ NULL, NULL, false);
if (IS_ERR(hwpt)) {
rc = PTR_ERR(hwpt);
goto out_unlock;
@@ -8,6 +8,7 @@
#include <linux/xarray.h>
#include <linux/refcount.h>
#include <linux/uaccess.h>
+#include <linux/iommu.h>
struct iommu_domain;
struct iommu_group;
@@ -243,6 +244,7 @@ int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
*/
struct iommufd_hw_pagetable {
struct iommufd_object obj;
+ struct iommufd_hw_pagetable *parent;
struct iommufd_ioas *ioas;
struct iommu_domain *domain;
bool auto_domain : 1;
@@ -254,7 +256,10 @@ struct iommufd_hw_pagetable {
struct iommufd_hw_pagetable *
iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
- struct iommufd_device *idev, bool immediate_attach);
+ struct iommufd_device *idev,
+ struct iommufd_hw_pagetable *parent,
+ union iommu_domain_user_data *user_data,
+ bool immediate_attach);
int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt);
int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev);