[v1,7/8] iommufd/device: Use iommu_group_replace_domain()

Message ID de1cec7698e9b4e2ad03b7d9414b25d655fe5a6e.1675320212.git.nicolinc@nvidia.com
State New
Headers
Series Add IO page table replacement support |

Commit Message

Nicolin Chen Feb. 2, 2023, 7:05 a.m. UTC
  iommu_group_replace_domain() is introduced to support use cases where an
iommu_group can be attached to a new domain without getting detached from
the old one. This replacement feature will be useful, for cases such as:
1) vPASID mode, when a guest wants to replace a single pasid (PASID=0)
   table with a larger table (PASID=N)
2) Nesting mode, when switching the attaching device from an S2 domain
   to an S1 domain, or when switching between relevant S1 domains.
as it allows these cases to switch seamlessly without a DMA disruption.

So, call iommu_group_replace_domain() in the iommufd_device_do_attach().
And then decrease the refcount of the hwpt being replaced.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/iommufd/device.c          | 26 +++++++++++++++++++++++--
 drivers/iommu/iommufd/iommufd_private.h |  2 ++
 2 files changed, 26 insertions(+), 2 deletions(-)
  

Comments

Tian, Kevin Feb. 6, 2023, 8:46 a.m. UTC | #1
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Thursday, February 2, 2023 3:05 PM
>
> @@ -246,6 +249,18 @@ static int iommufd_device_do_attach(struct
> iommufd_device *idev,
>  		}
>  	}
> 
> +	if (cur_hwpt) {
> +		/* Replace the cur_hwpt */
> +		mutex_lock(&cur_hwpt->devices_lock);
> +		if (cur_hwpt->ioas != hwpt->ioas)
> +			iopt_remove_reserved_iova(&cur_hwpt->ioas->iopt,
> +						  idev->dev);
> +		list_del(&cur_hwpt->hwpt_item);

emmm shouldn't this be done only when the device is the last
one attached to the hwpt? and if it's the last one you should
also iopt_table_remove_domain() together with list_del, i.e.
similar housekeeping as done in iommufd_device_detach().
  
Nicolin Chen Feb. 6, 2023, 7:17 p.m. UTC | #2
On Mon, Feb 06, 2023 at 08:46:04AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Thursday, February 2, 2023 3:05 PM
> >
> > @@ -246,6 +249,18 @@ static int iommufd_device_do_attach(struct
> > iommufd_device *idev,
> >               }
> >       }
> >
> > +     if (cur_hwpt) {
> > +             /* Replace the cur_hwpt */
> > +             mutex_lock(&cur_hwpt->devices_lock);
> > +             if (cur_hwpt->ioas != hwpt->ioas)
> > +                     iopt_remove_reserved_iova(&cur_hwpt->ioas->iopt,
> > +                                               idev->dev);
> > +             list_del(&cur_hwpt->hwpt_item);
> 
> emmm shouldn't this be done only when the device is the last
> one attached to the hwpt? and if it's the last one you should
> also iopt_table_remove_domain() together with list_del, i.e.
> similar housekeeping as done in iommufd_device_detach().

You are right. I had another patch on top of this series,
moving this list_del() and iopt_table_remove_domain() to
the destroy() callback, so I overlooked.

And I just found that the list_add_del(hwpt_item) in the
IOMMUFD_OBJ_HW_PAGETABLE case doesn't seem to call at the
first device's attachment. So, I think that we might need
my previous "symmetric" patch in this series too.

Will fix in v2. Thanks!

Nic
  
Tian, Kevin Feb. 7, 2023, 12:37 a.m. UTC | #3
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Tuesday, February 7, 2023 3:18 AM
> 
> On Mon, Feb 06, 2023 at 08:46:04AM +0000, Tian, Kevin wrote:
> > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > Sent: Thursday, February 2, 2023 3:05 PM
> > >
> > > @@ -246,6 +249,18 @@ static int iommufd_device_do_attach(struct
> > > iommufd_device *idev,
> > >               }
> > >       }
> > >
> > > +     if (cur_hwpt) {
> > > +             /* Replace the cur_hwpt */
> > > +             mutex_lock(&cur_hwpt->devices_lock);
> > > +             if (cur_hwpt->ioas != hwpt->ioas)
> > > +                     iopt_remove_reserved_iova(&cur_hwpt->ioas->iopt,
> > > +                                               idev->dev);
> > > +             list_del(&cur_hwpt->hwpt_item);
> >
> > emmm shouldn't this be done only when the device is the last
> > one attached to the hwpt? and if it's the last one you should
> > also iopt_table_remove_domain() together with list_del, i.e.
> > similar housekeeping as done in iommufd_device_detach().
> 
> You are right. I had another patch on top of this series,
> moving this list_del() and iopt_table_remove_domain() to
> the destroy() callback, so I overlooked.
> 
> And I just found that the list_add_del(hwpt_item) in the
> IOMMUFD_OBJ_HW_PAGETABLE case doesn't seem to call at the
> first device's attachment. So, I think that we might need
> my previous "symmetric" patch in this series too.
> 

Yes, that makes sense.
  

Patch

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 8118d5262800..c7701e449f3d 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -9,6 +9,8 @@ 
 #include "io_pagetable.h"
 #include "iommufd_private.h"
 
+MODULE_IMPORT_NS(IOMMUFD_INTERNAL);
+
 static bool allow_unsafe_interrupts;
 module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(
@@ -197,6 +199,7 @@  static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt,
 static int iommufd_device_do_attach(struct iommufd_device *idev,
 				    struct iommufd_hw_pagetable *hwpt)
 {
+	struct iommufd_hw_pagetable *cur_hwpt = idev->hwpt;
 	phys_addr_t sw_msi_start = PHYS_ADDR_MAX;
 	int rc;
 
@@ -234,7 +237,7 @@  static int iommufd_device_do_attach(struct iommufd_device *idev,
 	 * the group once for the first device that is in the group.
 	 */
 	if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) {
-		rc = iommu_attach_group(hwpt->domain, idev->group);
+		rc = iommu_group_replace_domain(idev->group, hwpt->domain);
 		if (rc)
 			goto out_iova;
 
@@ -246,6 +249,18 @@  static int iommufd_device_do_attach(struct iommufd_device *idev,
 		}
 	}
 
+	if (cur_hwpt) {
+		/* Replace the cur_hwpt */
+		mutex_lock(&cur_hwpt->devices_lock);
+		if (cur_hwpt->ioas != hwpt->ioas)
+			iopt_remove_reserved_iova(&cur_hwpt->ioas->iopt,
+						  idev->dev);
+		list_del(&cur_hwpt->hwpt_item);
+		list_del(&idev->devices_item);
+		refcount_dec(&cur_hwpt->obj.users);
+		refcount_dec(&idev->obj.users);
+		mutex_unlock(&cur_hwpt->devices_lock);
+	}
 	idev->hwpt = hwpt;
 	refcount_inc(&hwpt->obj.users);
 	list_add(&idev->devices_item, &hwpt->devices);
@@ -253,7 +268,10 @@  static int iommufd_device_do_attach(struct iommufd_device *idev,
 	return 0;
 
 out_detach:
-	iommu_detach_group(hwpt->domain, idev->group);
+	if (cur_hwpt)
+		iommu_group_replace_domain(idev->group, cur_hwpt->domain);
+	else
+		iommu_detach_group(hwpt->domain, idev->group);
 out_iova:
 	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
 out_unlock:
@@ -343,6 +361,9 @@  int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
 		struct iommufd_hw_pagetable *hwpt =
 			container_of(pt_obj, struct iommufd_hw_pagetable, obj);
 
+		if (idev->hwpt == hwpt)
+			goto out_done;
+
 		rc = iommufd_device_do_attach(idev, hwpt);
 		if (rc)
 			goto out_put_pt_obj;
@@ -367,6 +388,7 @@  int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
 	}
 
 	refcount_inc(&idev->obj.users);
+out_done:
 	*pt_id = idev->hwpt->obj.id;
 	rc = 0;
 
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 593138bb37b8..200c783800ad 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -9,6 +9,8 @@ 
 #include <linux/refcount.h>
 #include <linux/uaccess.h>
 
+#include "../iommu-priv.h"
+
 struct iommu_domain;
 struct iommu_group;
 struct iommu_option;