[07/12] iommufd: Add IOMMU_HWPT_INVALIDATE

Message ID 20230309080910.607396-8-yi.l.liu@intel.com
State New
Headers
Series iommufd: Add nesting infrastructure |

Commit Message

Yi Liu March 9, 2023, 8:09 a.m. UTC
  In nested translation, the stage-1 page table is user-managed and used
by IOMMU hardware, so destroying mappings in the stage-1 page table should
be followed with an IOTLB invalidation.

This adds IOMMU_HWPT_INVALIDATE for IOTLB invalidation.

Co-developed-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/iommu/iommufd/hw_pagetable.c    | 56 +++++++++++++++++++++++++
 drivers/iommu/iommufd/iommufd_private.h |  9 ++++
 drivers/iommu/iommufd/main.c            |  3 ++
 include/uapi/linux/iommufd.h            | 27 ++++++++++++
 4 files changed, 95 insertions(+)
  

Comments

Baolu Lu March 10, 2023, 3:15 a.m. UTC | #1
On 3/9/23 4:09 PM, Yi Liu wrote:
> In nested translation, the stage-1 page table is user-managed and used
> by IOMMU hardware, so destroying mappings in the stage-1 page table should
> be followed with an IOTLB invalidation.

s/destroying mappings/update of any present page table entry/

> This adds IOMMU_HWPT_INVALIDATE for IOTLB invalidation.
> 
> Co-developed-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> ---
>   drivers/iommu/iommufd/hw_pagetable.c    | 56 +++++++++++++++++++++++++
>   drivers/iommu/iommufd/iommufd_private.h |  9 ++++
>   drivers/iommu/iommufd/main.c            |  3 ++
>   include/uapi/linux/iommufd.h            | 27 ++++++++++++
>   4 files changed, 95 insertions(+)
> 
> diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
> index 64e7cf7142e1..67facca98de1 100644
> --- a/drivers/iommu/iommufd/hw_pagetable.c
> +++ b/drivers/iommu/iommufd/hw_pagetable.c
> @@ -284,3 +284,59 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
>   	iommufd_put_object(&idev->obj);
>   	return rc;
>   }
> +
> +/*
> + * size of page table type specific invalidate_info, indexed by
> + * enum iommu_hwpt_type.
> + */
> +static const size_t iommufd_hwpt_invalidate_info_size[] = {};
> +
> +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd)
> +{
> +	struct iommu_hwpt_invalidate *cmd = ucmd->cmd;
> +	struct iommufd_hw_pagetable *hwpt;
> +	u64 user_ptr;
> +	u32 user_data_len, klen;
> +	int rc = 0;
> +
> +	/*
> +	 * For a user-managed HWPT, type should not be IOMMU_HWPT_TYPE_DEFAULT.
> +	 * data_len should not exceed the size of iommufd_invalidate_buffer.
> +	 */
> +	if (cmd->data_type == IOMMU_HWPT_TYPE_DEFAULT || !cmd->data_len ||
> +	    cmd->data_type >= ARRAY_SIZE(iommufd_hwpt_invalidate_info_size))

"data_len should not exceed the size of iommufd_invalidate_buffer."

How is this checked?

> +		return -EOPNOTSUPP;
> +
> +	hwpt = iommufd_get_hwpt(ucmd, cmd->hwpt_id);
> +	if (IS_ERR(hwpt))
> +		return PTR_ERR(hwpt);
> +
> +	/* Do not allow any kernel-managed hw_pagetable */
> +	if (!hwpt->parent) {
> +		rc = -EINVAL;
> +		goto out_put_hwpt;
> +	}
> +
> +	klen = iommufd_hwpt_invalidate_info_size[cmd->data_type];
> +	if (!klen) {
> +		rc = -EINVAL;
> +		goto out_put_hwpt;
> +	}
> +
> +	/*
> +	 * Copy the needed fields before reusing the ucmd buffer, this
> +	 * avoids memory allocation in this path.
> +	 */
> +	user_ptr = cmd->data_uptr;
> +	user_data_len = cmd->data_len;

Is it a valid case if "user_data_len < klen"?

> +
> +	rc = copy_struct_from_user(cmd, klen,
> +				   u64_to_user_ptr(user_ptr), user_data_len);
> +	if (rc)
> +		goto out_put_hwpt;
> +
> +	hwpt->domain->ops->cache_invalidate_user(hwpt->domain, cmd);
> +out_put_hwpt:
> +	iommufd_put_object(&hwpt->obj);
> +	return rc;
> +}
> diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> index 182c074eecdc..d879264d1acf 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -265,6 +265,7 @@ struct iommufd_hw_pagetable *
>   iommufd_hw_pagetable_detach(struct iommufd_device *idev);
>   void iommufd_hw_pagetable_destroy(struct iommufd_object *obj);
>   int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd);
> +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd);
>   
>   static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
>   					    struct iommufd_hw_pagetable *hwpt)
> @@ -276,6 +277,14 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
>   		refcount_dec(&hwpt->obj.users);
>   }
>   
> +static inline struct iommufd_hw_pagetable *
> +iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id)
> +{
> +	return container_of(iommufd_get_object(ucmd->ictx, id,
> +					       IOMMUFD_OBJ_HW_PAGETABLE),
> +			    struct iommufd_hw_pagetable, obj);
> +}
> +
>   struct iommufd_group {
>   	struct kref ref;
>   	struct mutex lock;
> diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
> index 7ab1e2c638a1..2cf45f65b637 100644
> --- a/drivers/iommu/iommufd/main.c
> +++ b/drivers/iommu/iommufd/main.c
> @@ -263,6 +263,7 @@ union ucmd_buffer {
>   	struct iommu_destroy destroy;
>   	struct iommu_hwpt_alloc hwpt;
>   	struct iommu_hw_info info;
> +	struct iommu_hwpt_invalidate cache;
>   	struct iommu_ioas_alloc alloc;
>   	struct iommu_ioas_allow_iovas allow_iovas;
>   	struct iommu_ioas_copy ioas_copy;
> @@ -298,6 +299,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
>   		 data_uptr),
>   	IOCTL_OP(IOMMU_DEVICE_GET_HW_INFO, iommufd_device_get_hw_info,
>   		 struct iommu_hw_info, __reserved),
> +	IOCTL_OP(IOMMU_HWPT_INVALIDATE, iommufd_hwpt_invalidate,
> +		 struct iommu_hwpt_invalidate, data_uptr),
>   	IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl,
>   		 struct iommu_ioas_alloc, out_ioas_id),
>   	IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas,
> diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
> index 48781ff40a37..d0962c41f8d6 100644
> --- a/include/uapi/linux/iommufd.h
> +++ b/include/uapi/linux/iommufd.h
> @@ -47,6 +47,7 @@ enum {
>   	IOMMUFD_CMD_VFIO_IOAS,
>   	IOMMUFD_CMD_HWPT_ALLOC,
>   	IOMMUFD_CMD_DEVICE_GET_HW_INFO,
> +	IOMMUFD_CMD_HWPT_INVALIDATE,
>   };
>   
>   /**
> @@ -447,4 +448,30 @@ struct iommu_hw_info {
>   	__u32 __reserved;
>   };
>   #define IOMMU_DEVICE_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DEVICE_GET_HW_INFO)
> +
> +/**
> + * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
> + * @size: sizeof(struct iommu_hwpt_invalidate)
> + * @hwpt_id: HWPT ID of target hardware page table for the invalidation
> + * @data_type: One of enum iommu_hwpt_type
> + * @data_len: Length of the type specific data
> + * @data_uptr: User pointer to the type specific data
> + *
> + * Invalidate the iommu cache for user-managed page table. Modifications
> + * on user-managed page table should be followed with this operation to
> + * sync the IOTLB. This is only needed by user-managed hw_pagetables, so
> + * the @data_type should never be IOMMU_HWPT_TYPE_DEFAULT.
> + *
> + * +==============================+========================================+
> + * | @data_type                   |     Data structure in @data_uptr       |
> + * +------------------------------+----------------------------------------+
> + */
> +struct iommu_hwpt_invalidate {
> +	__u32 size;
> +	__u32 hwpt_id;
> +	__u32 data_type;
> +	__u32 data_len;
> +	__aligned_u64 data_uptr;
> +};
> +#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
>   #endif

Best regards,
baolu
  
Jason Gunthorpe March 10, 2023, 5:50 p.m. UTC | #2
On Thu, Mar 09, 2023 at 12:09:05AM -0800, Yi Liu wrote:
> +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd)
> +{
> +	struct iommu_hwpt_invalidate *cmd = ucmd->cmd;
> +	struct iommufd_hw_pagetable *hwpt;
> +	u64 user_ptr;
> +	u32 user_data_len, klen;
> +	int rc = 0;
> +
> +	/*
> +	 * For a user-managed HWPT, type should not be IOMMU_HWPT_TYPE_DEFAULT.
> +	 * data_len should not exceed the size of iommufd_invalidate_buffer.
> +	 */
> +	if (cmd->data_type == IOMMU_HWPT_TYPE_DEFAULT || !cmd->data_len ||
> +	    cmd->data_type >= ARRAY_SIZE(iommufd_hwpt_invalidate_info_size))
> +		return -EOPNOTSUPP;

This needs to do the standard check for zeros in unknown trailing data
bit. Check that alloc does it too

Jason
  
Yi Liu March 14, 2023, 4:12 a.m. UTC | #3
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Friday, March 10, 2023 11:16 AM
> 
> On 3/9/23 4:09 PM, Yi Liu wrote:
> > In nested translation, the stage-1 page table is user-managed and used
> > by IOMMU hardware, so destroying mappings in the stage-1 page table
> should
> > be followed with an IOTLB invalidation.
> 
> s/destroying mappings/update of any present page table entry/

Right. Not only destroying.

> > This adds IOMMU_HWPT_INVALIDATE for IOTLB invalidation.
> >
> > Co-developed-by: Nicolin Chen <nicolinc@nvidia.com>
> > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> > Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> > ---
> >   drivers/iommu/iommufd/hw_pagetable.c    | 56
> +++++++++++++++++++++++++
> >   drivers/iommu/iommufd/iommufd_private.h |  9 ++++
> >   drivers/iommu/iommufd/main.c            |  3 ++
> >   include/uapi/linux/iommufd.h            | 27 ++++++++++++
> >   4 files changed, 95 insertions(+)
> >
> > diff --git a/drivers/iommu/iommufd/hw_pagetable.c
> b/drivers/iommu/iommufd/hw_pagetable.c
> > index 64e7cf7142e1..67facca98de1 100644
> > --- a/drivers/iommu/iommufd/hw_pagetable.c
> > +++ b/drivers/iommu/iommufd/hw_pagetable.c
> > @@ -284,3 +284,59 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd
> *ucmd)
> >   	iommufd_put_object(&idev->obj);
> >   	return rc;
> >   }
> > +
> > +/*
> > + * size of page table type specific invalidate_info, indexed by
> > + * enum iommu_hwpt_type.
> > + */
> > +static const size_t iommufd_hwpt_invalidate_info_size[] = {};
> > +
> > +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd)
> > +{
> > +	struct iommu_hwpt_invalidate *cmd = ucmd->cmd;
> > +	struct iommufd_hw_pagetable *hwpt;
> > +	u64 user_ptr;
> > +	u32 user_data_len, klen;
> > +	int rc = 0;
> > +
> > +	/*
> > +	 * For a user-managed HWPT, type should not be
> IOMMU_HWPT_TYPE_DEFAULT.
> > +	 * data_len should not exceed the size of
> iommufd_invalidate_buffer.
> > +	 */
> > +	if (cmd->data_type == IOMMU_HWPT_TYPE_DEFAULT || !cmd-
> >data_len ||
> > +	    cmd->data_type >=
> ARRAY_SIZE(iommufd_hwpt_invalidate_info_size))
> 
> "data_len should not exceed the size of iommufd_invalidate_buffer."
> 
> How is this checked?

Hmmm, this is a stale comment I suppose.

> 
> > +		return -EOPNOTSUPP;
> > +
> > +	hwpt = iommufd_get_hwpt(ucmd, cmd->hwpt_id);
> > +	if (IS_ERR(hwpt))
> > +		return PTR_ERR(hwpt);
> > +
> > +	/* Do not allow any kernel-managed hw_pagetable */
> > +	if (!hwpt->parent) {
> > +		rc = -EINVAL;
> > +		goto out_put_hwpt;
> > +	}
> > +
> > +	klen = iommufd_hwpt_invalidate_info_size[cmd->data_type];
> > +	if (!klen) {
> > +		rc = -EINVAL;
> > +		goto out_put_hwpt;
> > +	}
> > +
> > +	/*
> > +	 * Copy the needed fields before reusing the ucmd buffer, this
> > +	 * avoids memory allocation in this path.
> > +	 */
> > +	user_ptr = cmd->data_uptr;
> > +	user_data_len = cmd->data_len;
> 
> Is it a valid case if "user_data_len < klen"?

Yes. e.g. an old qemu running on a new kernel which has new field
added in the end of the data structure.

Regards,
Yi Liu
  
Yi Liu March 14, 2023, 4:14 a.m. UTC | #4
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Saturday, March 11, 2023 1:50 AM
> 
> On Thu, Mar 09, 2023 at 12:09:05AM -0800, Yi Liu wrote:
> > +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd)
> > +{
> > +	struct iommu_hwpt_invalidate *cmd = ucmd->cmd;
> > +	struct iommufd_hw_pagetable *hwpt;
> > +	u64 user_ptr;
> > +	u32 user_data_len, klen;
> > +	int rc = 0;
> > +
> > +	/*
> > +	 * For a user-managed HWPT, type should not be
> IOMMU_HWPT_TYPE_DEFAULT.
> > +	 * data_len should not exceed the size of
> iommufd_invalidate_buffer.
> > +	 */
> > +	if (cmd->data_type == IOMMU_HWPT_TYPE_DEFAULT || !cmd-
> >data_len ||
> > +	    cmd->data_type >=
> ARRAY_SIZE(iommufd_hwpt_invalidate_info_size))
> > +		return -EOPNOTSUPP;
> 
> This needs to do the standard check for zeros in unknown trailing data
> bit. Check that alloc does it too

Yes. would add it in both path.

Regards,
Yi Liu
  
Yi Liu March 14, 2023, 4:18 a.m. UTC | #5
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Saturday, March 11, 2023 1:50 AM
> 
> On Thu, Mar 09, 2023 at 12:09:05AM -0800, Yi Liu wrote:
> > +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd)
> > +{
> > +	struct iommu_hwpt_invalidate *cmd = ucmd->cmd;
> > +	struct iommufd_hw_pagetable *hwpt;
> > +	u64 user_ptr;
> > +	u32 user_data_len, klen;
> > +	int rc = 0;
> > +
> > +	/*
> > +	 * For a user-managed HWPT, type should not be
> IOMMU_HWPT_TYPE_DEFAULT.
> > +	 * data_len should not exceed the size of
> iommufd_invalidate_buffer.
> > +	 */
> > +	if (cmd->data_type == IOMMU_HWPT_TYPE_DEFAULT || !cmd-
> >data_len ||
> > +	    cmd->data_type >=
> ARRAY_SIZE(iommufd_hwpt_invalidate_info_size))
> > +		return -EOPNOTSUPP;
> 
> This needs to do the standard check for zeros in unknown trailing data
> bit. Check that alloc does it too

Maybe it has been covered by the copy_struct_from_user(). Is it?

+	/*
+	 * Copy the needed fields before reusing the ucmd buffer, this
+	 * avoids memory allocation in this path.
+	 */
+	user_ptr = cmd->data_uptr;
+	user_data_len = cmd->data_len;
+
+	rc = copy_struct_from_user(cmd, klen,
+				   u64_to_user_ptr(user_ptr), user_data_len);

Regards,
Yi Liu
  
Jason Gunthorpe March 20, 2023, 12:48 p.m. UTC | #6
On Tue, Mar 14, 2023 at 04:18:21AM +0000, Liu, Yi L wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Saturday, March 11, 2023 1:50 AM
> > 
> > On Thu, Mar 09, 2023 at 12:09:05AM -0800, Yi Liu wrote:
> > > +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd)
> > > +{
> > > +	struct iommu_hwpt_invalidate *cmd = ucmd->cmd;
> > > +	struct iommufd_hw_pagetable *hwpt;
> > > +	u64 user_ptr;
> > > +	u32 user_data_len, klen;
> > > +	int rc = 0;
> > > +
> > > +	/*
> > > +	 * For a user-managed HWPT, type should not be
> > IOMMU_HWPT_TYPE_DEFAULT.
> > > +	 * data_len should not exceed the size of
> > iommufd_invalidate_buffer.
> > > +	 */
> > > +	if (cmd->data_type == IOMMU_HWPT_TYPE_DEFAULT || !cmd-
> > >data_len ||
> > > +	    cmd->data_type >=
> > ARRAY_SIZE(iommufd_hwpt_invalidate_info_size))
> > > +		return -EOPNOTSUPP;
> > 
> > This needs to do the standard check for zeros in unknown trailing data
> > bit. Check that alloc does it too
> 
> Maybe it has been covered by the copy_struct_from_user(). Is it?

Yes

Jason
  

Patch

diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 64e7cf7142e1..67facca98de1 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -284,3 +284,59 @@  int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
 	iommufd_put_object(&idev->obj);
 	return rc;
 }
+
+/*
+ * size of page table type specific invalidate_info, indexed by
+ * enum iommu_hwpt_type.
+ */
+static const size_t iommufd_hwpt_invalidate_info_size[] = {};
+
+int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_hwpt_invalidate *cmd = ucmd->cmd;
+	struct iommufd_hw_pagetable *hwpt;
+	u64 user_ptr;
+	u32 user_data_len, klen;
+	int rc = 0;
+
+	/*
+	 * For a user-managed HWPT, type should not be IOMMU_HWPT_TYPE_DEFAULT.
+	 * data_len should not exceed the size of iommufd_invalidate_buffer.
+	 */
+	if (cmd->data_type == IOMMU_HWPT_TYPE_DEFAULT || !cmd->data_len ||
+	    cmd->data_type >= ARRAY_SIZE(iommufd_hwpt_invalidate_info_size))
+		return -EOPNOTSUPP;
+
+	hwpt = iommufd_get_hwpt(ucmd, cmd->hwpt_id);
+	if (IS_ERR(hwpt))
+		return PTR_ERR(hwpt);
+
+	/* Do not allow any kernel-managed hw_pagetable */
+	if (!hwpt->parent) {
+		rc = -EINVAL;
+		goto out_put_hwpt;
+	}
+
+	klen = iommufd_hwpt_invalidate_info_size[cmd->data_type];
+	if (!klen) {
+		rc = -EINVAL;
+		goto out_put_hwpt;
+	}
+
+	/*
+	 * Copy the needed fields before reusing the ucmd buffer, this
+	 * avoids memory allocation in this path.
+	 */
+	user_ptr = cmd->data_uptr;
+	user_data_len = cmd->data_len;
+
+	rc = copy_struct_from_user(cmd, klen,
+				   u64_to_user_ptr(user_ptr), user_data_len);
+	if (rc)
+		goto out_put_hwpt;
+
+	hwpt->domain->ops->cache_invalidate_user(hwpt->domain, cmd);
+out_put_hwpt:
+	iommufd_put_object(&hwpt->obj);
+	return rc;
+}
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 182c074eecdc..d879264d1acf 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -265,6 +265,7 @@  struct iommufd_hw_pagetable *
 iommufd_hw_pagetable_detach(struct iommufd_device *idev);
 void iommufd_hw_pagetable_destroy(struct iommufd_object *obj);
 int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd);
+int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd);
 
 static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
 					    struct iommufd_hw_pagetable *hwpt)
@@ -276,6 +277,14 @@  static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
 		refcount_dec(&hwpt->obj.users);
 }
 
+static inline struct iommufd_hw_pagetable *
+iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id)
+{
+	return container_of(iommufd_get_object(ucmd->ictx, id,
+					       IOMMUFD_OBJ_HW_PAGETABLE),
+			    struct iommufd_hw_pagetable, obj);
+}
+
 struct iommufd_group {
 	struct kref ref;
 	struct mutex lock;
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 7ab1e2c638a1..2cf45f65b637 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -263,6 +263,7 @@  union ucmd_buffer {
 	struct iommu_destroy destroy;
 	struct iommu_hwpt_alloc hwpt;
 	struct iommu_hw_info info;
+	struct iommu_hwpt_invalidate cache;
 	struct iommu_ioas_alloc alloc;
 	struct iommu_ioas_allow_iovas allow_iovas;
 	struct iommu_ioas_copy ioas_copy;
@@ -298,6 +299,8 @@  static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
 		 data_uptr),
 	IOCTL_OP(IOMMU_DEVICE_GET_HW_INFO, iommufd_device_get_hw_info,
 		 struct iommu_hw_info, __reserved),
+	IOCTL_OP(IOMMU_HWPT_INVALIDATE, iommufd_hwpt_invalidate,
+		 struct iommu_hwpt_invalidate, data_uptr),
 	IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl,
 		 struct iommu_ioas_alloc, out_ioas_id),
 	IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas,
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 48781ff40a37..d0962c41f8d6 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -47,6 +47,7 @@  enum {
 	IOMMUFD_CMD_VFIO_IOAS,
 	IOMMUFD_CMD_HWPT_ALLOC,
 	IOMMUFD_CMD_DEVICE_GET_HW_INFO,
+	IOMMUFD_CMD_HWPT_INVALIDATE,
 };
 
 /**
@@ -447,4 +448,30 @@  struct iommu_hw_info {
 	__u32 __reserved;
 };
 #define IOMMU_DEVICE_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DEVICE_GET_HW_INFO)
+
+/**
+ * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
+ * @size: sizeof(struct iommu_hwpt_invalidate)
+ * @hwpt_id: HWPT ID of target hardware page table for the invalidation
+ * @data_type: One of enum iommu_hwpt_type
+ * @data_len: Length of the type specific data
+ * @data_uptr: User pointer to the type specific data
+ *
+ * Invalidate the iommu cache for user-managed page table. Modifications
+ * on user-managed page table should be followed with this operation to
+ * sync the IOTLB. This is only needed by user-managed hw_pagetables, so
+ * the @data_type should never be IOMMU_HWPT_TYPE_DEFAULT.
+ *
+ * +==============================+========================================+
+ * | @data_type                   |     Data structure in @data_uptr       |
+ * +------------------------------+----------------------------------------+
+ */
+struct iommu_hwpt_invalidate {
+	__u32 size;
+	__u32 hwpt_id;
+	__u32 data_type;
+	__u32 data_len;
+	__aligned_u64 data_uptr;
+};
+#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
 #endif