[v4,05/17] iommufd: Separate kernel-managed HWPT alloc/destroy/abort functions
Commit Message
From: Nicolin Chen <nicolinc@nvidia.com>
As one of the previous commits mentioned, a user-managed HWPT will have
some different attributes/members. It'd be more clear by having separate
allocators. Since the existing iommufd_hw_pagetable_alloc() serves well
kernel-managed HWPTs, apply some minimal updates to mark it as a kernel-
managed HWPT allocator.
Also, add a pair of function pointers (abort and destroy) in the struct,
to separate different cleanup routines. Then rename the existing cleanup
functions to iommufd_kernel_managed_hwpt_destroy/abort() linked to the
HWPT in the allocator.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
drivers/iommu/iommufd/hw_pagetable.c | 34 ++++++++++++++++++++-----
drivers/iommu/iommufd/iommufd_private.h | 3 +++
2 files changed, 31 insertions(+), 6 deletions(-)
Comments
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Thursday, September 21, 2023 3:51 PM
>
> -void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
> +static void iommufd_kernel_managed_hwpt_destroy(struct
> iommufd_object *obj)
'_managed_' could be removed. 'kernel_hwpt' and 'user_hwpt' are
clear enough.
ditto for all other related functions.
On Thu, Sep 21, 2023 at 12:51:26AM -0700, Yi Liu wrote:
> From: Nicolin Chen <nicolinc@nvidia.com>
>
> As one of the previous commits mentioned, a user-managed HWPT will have
> some different attributes/members. It'd be more clear by having separate
> allocators. Since the existing iommufd_hw_pagetable_alloc() serves well
> kernel-managed HWPTs, apply some minimal updates to mark it as a kernel-
> managed HWPT allocator.
>
> Also, add a pair of function pointers (abort and destroy) in the struct,
> to separate different cleanup routines. Then rename the existing cleanup
> functions to iommufd_kernel_managed_hwpt_destroy/abort() linked to the
> HWPT in the allocator.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> ---
> drivers/iommu/iommufd/hw_pagetable.c | 34 ++++++++++++++++++++-----
> drivers/iommu/iommufd/iommufd_private.h | 3 +++
> 2 files changed, 31 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
> index 554a9c3d740f..1cc7178121d1 100644
> --- a/drivers/iommu/iommufd/hw_pagetable.c
> +++ b/drivers/iommu/iommufd/hw_pagetable.c
> @@ -8,7 +8,7 @@
> #include "../iommu-priv.h"
> #include "iommufd_private.h"
>
> -void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
> +static void iommufd_kernel_managed_hwpt_destroy(struct iommufd_object *obj)
> {
> struct iommufd_hw_pagetable *hwpt =
> container_of(obj, struct iommufd_hw_pagetable, obj);
> @@ -27,7 +27,12 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
> refcount_dec(&hwpt->ioas->obj.users);
> }
>
> -void iommufd_hw_pagetable_abort(struct iommufd_object *obj)
> +void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
> +{
> + container_of(obj, struct iommufd_hw_pagetable, obj)->destroy(obj);
> +}
> +
> +static void iommufd_kernel_managed_hwpt_abort(struct iommufd_object *obj)
> {
> struct iommufd_hw_pagetable *hwpt =
> container_of(obj, struct iommufd_hw_pagetable, obj);
> @@ -42,6 +47,11 @@ void iommufd_hw_pagetable_abort(struct iommufd_object *obj)
> iommufd_hw_pagetable_destroy(obj);
> }
>
> +void iommufd_hw_pagetable_abort(struct iommufd_object *obj)
> +{
> + container_of(obj, struct iommufd_hw_pagetable, obj)->abort(obj);
> +}
> +
> int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt)
> {
> if (hwpt->enforce_cache_coherency)
> @@ -57,7 +67,7 @@ int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt)
> }
>
> /**
> - * iommufd_hw_pagetable_alloc() - Get an iommu_domain for a device
> + * iommufd_hw_pagetable_alloc() - Get a kernel-managed iommu_domain for a device
> * @ictx: iommufd context
> * @ioas: IOAS to associate the domain with
> * @idev: Device to get an iommu_domain for
> @@ -66,9 +76,9 @@ int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt)
> * @user_data: Optional user_data pointer
> * @immediate_attach: True if idev should be attached to the hwpt
> *
> - * Allocate a new iommu_domain and return it as a hw_pagetable. The HWPT
> - * will be linked to the given ioas and upon return the underlying iommu_domain
> - * is fully popoulated.
> + * Allocate a new iommu_domain (must be IOMMU_DOMAIN_UNMANAGED) and return it as
> + * a kernel-managed hw_pagetable. The HWPT will be linked to the given ioas and
> + * upon return the underlying iommu_domain is fully popoulated.
> *
> * The caller must hold the ioas->mutex until after
> * iommufd_object_abort_and_destroy() or iommufd_object_finalize() is called on
> @@ -103,6 +113,8 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
> /* Pairs with iommufd_hw_pagetable_destroy() */
> refcount_inc(&ioas->obj.users);
> hwpt->ioas = ioas;
> + hwpt->abort = iommufd_kernel_managed_hwpt_abort;
> + hwpt->destroy = iommufd_kernel_managed_hwpt_destroy;
>
> if (ops->domain_alloc_user) {
> hwpt->domain = ops->domain_alloc_user(idev->dev, flags,
> @@ -121,6 +133,16 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
> }
> }
>
> + if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_UNMANAGED)) {
> + rc = -EINVAL;
> + goto out_abort;
> + }
> + /* Driver is buggy by mixing user-managed op in kernel-managed ops */
> + if (WARN_ON_ONCE(hwpt->domain->ops->cache_invalidate_user)) {
> + rc = -EINVAL;
> + goto out_abort;
> + }
> +
> /*
> * Set the coherency mode before we do iopt_table_add_domain() as some
> * iommus have a per-PTE bit that controls it and need to decide before
> diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> index 1d3b1a74e854..3e89c3d530f3 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -234,6 +234,9 @@ struct iommufd_hw_pagetable {
> struct iommufd_object obj;
> struct iommu_domain *domain;
>
> + void (*abort)(struct iommufd_object *obj);
> + void (*destroy)(struct iommufd_object *obj);
> +
> union {
> struct { /* kernel-managed */
> struct iommufd_ioas *ioas;
I think if you are doing this you are trying too hard to share the
struct.. Defaintely want to avoid function pointers in general, and
function pointers in a writable struct in particular.
An if of some sort in the two functions would be clearer
Jason
On Tue, Oct 10, 2023 at 03:49:32PM -0300, Jason Gunthorpe wrote:
> > diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> > index 1d3b1a74e854..3e89c3d530f3 100644
> > --- a/drivers/iommu/iommufd/iommufd_private.h
> > +++ b/drivers/iommu/iommufd/iommufd_private.h
> > @@ -234,6 +234,9 @@ struct iommufd_hw_pagetable {
> > struct iommufd_object obj;
> > struct iommu_domain *domain;
> >
> > + void (*abort)(struct iommufd_object *obj);
> > + void (*destroy)(struct iommufd_object *obj);
> > +
> > union {
> > struct { /* kernel-managed */
> > struct iommufd_ioas *ioas;
>
> I think if you are doing this you are trying too hard to share the
> struct.. Defaintely want to avoid function pointers in general, and
> function pointers in a writable struct in particular.
I looked at this for a while and I do still have the feeling that
probably two structs and even two type IDs is probably a cleaner
design.
Like this:
// Or maybe use obj.type ?
enum iommufd_hw_pagetable_type {
IOMMUFD_HWPT_PAGING,
IOMMUFD_HWPT_NESTED,
};
struct iommufd_hw_pagetable_common {
struct iommufd_object obj;
struct iommu_domain *domain;
enum iommufd_hw_pagetable_type type;
};
struct iommufd_hw_pagetable {
struct iommufd_hw_pagetable_common common;
struct iommufd_ioas *ioas;
bool auto_domain : 1;
bool enforce_cache_coherency : 1;
bool msi_cookie : 1;
/* Head at iommufd_ioas::hwpt_list */
struct list_head hwpt_item;
};
struct iommufd_hw_pagetable_nested {
struct iommufd_hw_pagetable_common common;
// ??
};
I poked at it in an editor for a bit and it was looking OK but
requires breaking up a bunch of functions then I ran out of time
Also, we probably should feed enforce_cache_coherency through the
alloc_hwpt uapi and not try to autodetect it..
Jason
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Friday, October 13, 2023 3:10 AM
>
> Also, we probably should feed enforce_cache_coherency through the
> alloc_hwpt uapi and not try to autodetect it..
>
In the past we had a long discussion about this with the conclusion
that user opt is the ideal model but it's fine to stay with autodetect
and existing vfio/kvm contract for coherency detection until we
see a real demand for user opt.
Is there anything new which changes your mind to have user opt now?
On Fri, Oct 13, 2023 at 07:13:34AM +0000, Tian, Kevin wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Friday, October 13, 2023 3:10 AM
> >
> > Also, we probably should feed enforce_cache_coherency through the
> > alloc_hwpt uapi and not try to autodetect it..
> >
>
> In the past we had a long discussion about this with the conclusion
> that user opt is the ideal model but it's fine to stay with autodetect
> and existing vfio/kvm contract for coherency detection until we
> see a real demand for user opt.
>
> Is there anything new which changes your mind to have user opt now?
I guess, I was just looking at the complexity it brings to keep that
working.
Jason
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Friday, October 13, 2023 10:06 PM
>
> On Fri, Oct 13, 2023 at 07:13:34AM +0000, Tian, Kevin wrote:
> > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > Sent: Friday, October 13, 2023 3:10 AM
> > >
> > > Also, we probably should feed enforce_cache_coherency through the
> > > alloc_hwpt uapi and not try to autodetect it..
> > >
> >
> > In the past we had a long discussion about this with the conclusion
> > that user opt is the ideal model but it's fine to stay with autodetect
> > and existing vfio/kvm contract for coherency detection until we
> > see a real demand for user opt.
> >
> > Is there anything new which changes your mind to have user opt now?
>
> I guess, I was just looking at the complexity it brings to keep that
> working.
>
vfio_file_enforced_coherent() currently just looks at device_iommu_capable()
and can be called before attach, assuming an autodetect model.
moving away from autodetect would need a new contract which I hesitate
to bother with at this point.
On Mon, Oct 16, 2023 at 08:26:25AM +0000, Tian, Kevin wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Friday, October 13, 2023 10:06 PM
> >
> > On Fri, Oct 13, 2023 at 07:13:34AM +0000, Tian, Kevin wrote:
> > > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > > Sent: Friday, October 13, 2023 3:10 AM
> > > >
> > > > Also, we probably should feed enforce_cache_coherency through the
> > > > alloc_hwpt uapi and not try to autodetect it..
> > > >
> > >
> > > In the past we had a long discussion about this with the conclusion
> > > that user opt is the ideal model but it's fine to stay with autodetect
> > > and existing vfio/kvm contract for coherency detection until we
> > > see a real demand for user opt.
> > >
> > > Is there anything new which changes your mind to have user opt now?
> >
> > I guess, I was just looking at the complexity it brings to keep that
> > working.
> >
>
> vfio_file_enforced_coherent() currently just looks at device_iommu_capable()
> and can be called before attach, assuming an autodetect model.
>
> moving away from autodetect would need a new contract which I hesitate
> to bother with at this point.
I think that is fine, we are not enabling new behaviors where qemu can
directly control the wbinvd support. If we want to do that it can be
done more directly.
If the vmm sees a coherent device and does not turn on enforced
coherency then it will get a broken wbinvd and be sad. Since this is
all in the userspace vt-d iommu driver, it can just do the right thing
anyhow.
Jason
@@ -8,7 +8,7 @@
#include "../iommu-priv.h"
#include "iommufd_private.h"
-void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
+static void iommufd_kernel_managed_hwpt_destroy(struct iommufd_object *obj)
{
struct iommufd_hw_pagetable *hwpt =
container_of(obj, struct iommufd_hw_pagetable, obj);
@@ -27,7 +27,12 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
refcount_dec(&hwpt->ioas->obj.users);
}
-void iommufd_hw_pagetable_abort(struct iommufd_object *obj)
+void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
+{
+ container_of(obj, struct iommufd_hw_pagetable, obj)->destroy(obj);
+}
+
+static void iommufd_kernel_managed_hwpt_abort(struct iommufd_object *obj)
{
struct iommufd_hw_pagetable *hwpt =
container_of(obj, struct iommufd_hw_pagetable, obj);
@@ -42,6 +47,11 @@ void iommufd_hw_pagetable_abort(struct iommufd_object *obj)
iommufd_hw_pagetable_destroy(obj);
}
+void iommufd_hw_pagetable_abort(struct iommufd_object *obj)
+{
+ container_of(obj, struct iommufd_hw_pagetable, obj)->abort(obj);
+}
+
int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt)
{
if (hwpt->enforce_cache_coherency)
@@ -57,7 +67,7 @@ int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt)
}
/**
- * iommufd_hw_pagetable_alloc() - Get an iommu_domain for a device
+ * iommufd_hw_pagetable_alloc() - Get a kernel-managed iommu_domain for a device
* @ictx: iommufd context
* @ioas: IOAS to associate the domain with
* @idev: Device to get an iommu_domain for
@@ -66,9 +76,9 @@ int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt)
* @user_data: Optional user_data pointer
* @immediate_attach: True if idev should be attached to the hwpt
*
- * Allocate a new iommu_domain and return it as a hw_pagetable. The HWPT
- * will be linked to the given ioas and upon return the underlying iommu_domain
- * is fully popoulated.
+ * Allocate a new iommu_domain (must be IOMMU_DOMAIN_UNMANAGED) and return it as
+ * a kernel-managed hw_pagetable. The HWPT will be linked to the given ioas and
+ * upon return the underlying iommu_domain is fully popoulated.
*
* The caller must hold the ioas->mutex until after
* iommufd_object_abort_and_destroy() or iommufd_object_finalize() is called on
@@ -103,6 +113,8 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
/* Pairs with iommufd_hw_pagetable_destroy() */
refcount_inc(&ioas->obj.users);
hwpt->ioas = ioas;
+ hwpt->abort = iommufd_kernel_managed_hwpt_abort;
+ hwpt->destroy = iommufd_kernel_managed_hwpt_destroy;
if (ops->domain_alloc_user) {
hwpt->domain = ops->domain_alloc_user(idev->dev, flags,
@@ -121,6 +133,16 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
}
}
+ if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_UNMANAGED)) {
+ rc = -EINVAL;
+ goto out_abort;
+ }
+ /* Driver is buggy by mixing user-managed op in kernel-managed ops */
+ if (WARN_ON_ONCE(hwpt->domain->ops->cache_invalidate_user)) {
+ rc = -EINVAL;
+ goto out_abort;
+ }
+
/*
* Set the coherency mode before we do iopt_table_add_domain() as some
* iommus have a per-PTE bit that controls it and need to decide before
@@ -234,6 +234,9 @@ struct iommufd_hw_pagetable {
struct iommufd_object obj;
struct iommu_domain *domain;
+ void (*abort)(struct iommufd_object *obj);
+ void (*destroy)(struct iommufd_object *obj);
+
union {
struct { /* kernel-managed */
struct iommufd_ioas *ioas;