[v2,3/3] iommufd/device: Change iommufd_hw_pagetable_has_group to device centric

Message ID 002911839dd30990d5e3135f8a0f8d41f14e856b.1674939002.git.nicolinc@nvidia.com
State New
Headers
Series iommufd: Remove iommufd_hw_pagetable_has_group |

Commit Message

Nicolin Chen Jan. 28, 2023, 9:18 p.m. UTC
  The iommufd_hw_pagetable_has_group() is a little hack to check whether
a hw_pagetable has an idev->group attached. This isn't device-centric
firstly, but also requires the hw_pagetable to maintain a device list
with a devices_lock, which gets overcomplicated among the routines for
different use cases, upcoming nested hwpts especially.

Since we can compare the iommu_group->domain and the hwpt->domain to
serve the same purpose while being device centric, change it and drop
the device list with the devices_lock accordingly.

Note that, in the detach routine, it previously checked !has_group as
there was a list_del on the device list. But now, the device is still
being attached to the hwpt, so the if logic gets inverted.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/iommufd/device.c          | 40 ++++++++-----------------
 drivers/iommu/iommufd/hw_pagetable.c    |  5 ----
 drivers/iommu/iommufd/iommufd_private.h |  2 --
 3 files changed, 12 insertions(+), 35 deletions(-)
  

Comments

Tian, Kevin Jan. 29, 2023, 9:37 a.m. UTC | #1
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Sunday, January 29, 2023 5:18 AM
> 
> -static bool iommufd_hw_pagetable_has_group(struct
> iommufd_hw_pagetable *hwpt,
> -					   struct iommu_group *group)
> +static bool iommufd_hw_pagetable_has_device(struct
> iommufd_hw_pagetable *hwpt,
> +					    struct device *dev)
>  {
> -	struct iommufd_device *cur_dev;
> -
> -	list_for_each_entry(cur_dev, &hwpt->devices, devices_item)
> -		if (cur_dev->group == group)
> -			return true;
> -	return false;
> +	/*
> +	 * iommu_get_domain_for_dev() returns an iommu_group->domain
> ptr, if it
> +	 * is the same domain as the hwpt->domain, it means that this hwpt
> has
> +	 * the iommu_group/device.
> +	 */
> +	return hwpt->domain == iommu_get_domain_for_dev(dev);
>  }

Here we could have three scenarios:

1) the device is attached to blocked domain;
2) the device is attached to hwpt->domain;
3) the device is attached to another hwpt->domain;

if this function returns false then iommufd_device_do_attach() will attach
the device to the specified hwpt. But then it's wrong for 3).

Has 3) been denied in earlier path? If yes at least a WARN_ON for
case 3) makes sense here.

> @@ -385,10 +372,8 @@ void iommufd_device_detach(struct
> iommufd_device *idev)
>  	struct iommufd_hw_pagetable *hwpt = idev->hwpt;
> 
>  	mutex_lock(&hwpt->ioas->mutex);
> -	mutex_lock(&hwpt->devices_lock);
>  	refcount_dec(hwpt->devices_users);
> -	list_del(&idev->devices_item);
> -	if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) {
> +	if (iommufd_hw_pagetable_has_device(hwpt, idev->dev)) {
>  		if (refcount_read(hwpt->devices_users) == 1) {
>  			iopt_table_remove_domain(&hwpt->ioas->iopt,
>  						 hwpt->domain);
> @@ -397,7 +382,6 @@ void iommufd_device_detach(struct iommufd_device
> *idev)
>  		iommu_detach_group(hwpt->domain, idev->group);
>  	}

emmm how do we track last device detach in a group? Here the first
device detach already leads to group detach...
  
Nicolin Chen Jan. 29, 2023, 10:38 a.m. UTC | #2
On Sun, Jan 29, 2023 at 09:37:00AM +0000, Tian, Kevin wrote:
> External email: Use caution opening links or attachments
> 
> 
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Sunday, January 29, 2023 5:18 AM
> >
> > -static bool iommufd_hw_pagetable_has_group(struct
> > iommufd_hw_pagetable *hwpt,
> > -                                        struct iommu_group *group)
> > +static bool iommufd_hw_pagetable_has_device(struct
> > iommufd_hw_pagetable *hwpt,
> > +                                         struct device *dev)
> >  {
> > -     struct iommufd_device *cur_dev;
> > -
> > -     list_for_each_entry(cur_dev, &hwpt->devices, devices_item)
> > -             if (cur_dev->group == group)
> > -                     return true;
> > -     return false;
> > +     /*
> > +      * iommu_get_domain_for_dev() returns an iommu_group->domain
> > ptr, if it
> > +      * is the same domain as the hwpt->domain, it means that this hwpt
> > has
> > +      * the iommu_group/device.
> > +      */
> > +     return hwpt->domain == iommu_get_domain_for_dev(dev);
> >  }
> 
> Here we could have three scenarios:
> 
> 1) the device is attached to blocked domain;
> 2) the device is attached to hwpt->domain;
> 3) the device is attached to another hwpt->domain;
> 
> if this function returns false then iommufd_device_do_attach() will attach
> the device to the specified hwpt. But then it's wrong for 3).
> 
> Has 3) been denied in earlier path? If yes at least a WARN_ON for
> case 3) makes sense here.

The case #3 means the device is already attached to some other
domain? Then vfio_iommufd_physical_attach_ioas returns -EBUSY
at the sanity of vdev->iommufd_attached. And the case #3 feels
like a domain replacement use case to me. So probably not that
necessary to add a wARN_ON?

> > @@ -385,10 +372,8 @@ void iommufd_device_detach(struct
> > iommufd_device *idev)
> >       struct iommufd_hw_pagetable *hwpt = idev->hwpt;
> >
> >       mutex_lock(&hwpt->ioas->mutex);
> > -     mutex_lock(&hwpt->devices_lock);
> >       refcount_dec(hwpt->devices_users);
> > -     list_del(&idev->devices_item);
> > -     if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) {
> > +     if (iommufd_hw_pagetable_has_device(hwpt, idev->dev)) {
> >               if (refcount_read(hwpt->devices_users) == 1) {
> >                       iopt_table_remove_domain(&hwpt->ioas->iopt,
> >                                                hwpt->domain);
> > @@ -397,7 +382,6 @@ void iommufd_device_detach(struct iommufd_device
> > *idev)
> >               iommu_detach_group(hwpt->domain, idev->group);
> >       }
> 
> emmm how do we track last device detach in a group? Here the first
> device detach already leads to group detach...

Oh no. That's a bug. Thanks for catching it.

We need an additional refcount somewhere to track the number of
attached devices in the iommu_group.

Nicolin
  
Tian, Kevin Jan. 30, 2023, 12:44 a.m. UTC | #3
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Sunday, January 29, 2023 6:39 PM
> 
> On Sun, Jan 29, 2023 at 09:37:00AM +0000, Tian, Kevin wrote:
> > External email: Use caution opening links or attachments
> >
> >
> > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > Sent: Sunday, January 29, 2023 5:18 AM
> > >
> > > -static bool iommufd_hw_pagetable_has_group(struct
> > > iommufd_hw_pagetable *hwpt,
> > > -                                        struct iommu_group *group)
> > > +static bool iommufd_hw_pagetable_has_device(struct
> > > iommufd_hw_pagetable *hwpt,
> > > +                                         struct device *dev)
> > >  {
> > > -     struct iommufd_device *cur_dev;
> > > -
> > > -     list_for_each_entry(cur_dev, &hwpt->devices, devices_item)
> > > -             if (cur_dev->group == group)
> > > -                     return true;
> > > -     return false;
> > > +     /*
> > > +      * iommu_get_domain_for_dev() returns an iommu_group->domain
> > > ptr, if it
> > > +      * is the same domain as the hwpt->domain, it means that this hwpt
> > > has
> > > +      * the iommu_group/device.
> > > +      */
> > > +     return hwpt->domain == iommu_get_domain_for_dev(dev);
> > >  }
> >
> > Here we could have three scenarios:
> >
> > 1) the device is attached to blocked domain;
> > 2) the device is attached to hwpt->domain;
> > 3) the device is attached to another hwpt->domain;
> >
> > if this function returns false then iommufd_device_do_attach() will attach
> > the device to the specified hwpt. But then it's wrong for 3).
> >
> > Has 3) been denied in earlier path? If yes at least a WARN_ON for
> > case 3) makes sense here.
> 
> The case #3 means the device is already attached to some other
> domain? Then vfio_iommufd_physical_attach_ioas returns -EBUSY
> at the sanity of vdev->iommufd_attached. And the case #3 feels
> like a domain replacement use case to me. So probably not that
> necessary to add a wARN_ON?
> 

You are right. I thought about the cdev case where the device is
not attached in vfio but has a valid domain due to attach status
of other devices in the group. But even in this case it's user's
responsibility to not break group boundary. So yes it's just a
domain replacement and WARN_ON is not required.
  
Nicolin Chen Jan. 30, 2023, 10:22 a.m. UTC | #4
On Sun, Jan 29, 2023 at 02:38:55AM -0800, Nicolin Chen wrote:
 
> > > @@ -385,10 +372,8 @@ void iommufd_device_detach(struct
> > > iommufd_device *idev)
> > >       struct iommufd_hw_pagetable *hwpt = idev->hwpt;
> > >
> > >       mutex_lock(&hwpt->ioas->mutex);
> > > -     mutex_lock(&hwpt->devices_lock);
> > >       refcount_dec(hwpt->devices_users);
> > > -     list_del(&idev->devices_item);
> > > -     if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) {
> > > +     if (iommufd_hw_pagetable_has_device(hwpt, idev->dev)) {
> > >               if (refcount_read(hwpt->devices_users) == 1) {
> > >                       iopt_table_remove_domain(&hwpt->ioas->iopt,
> > >                                                hwpt->domain);
> > > @@ -397,7 +382,6 @@ void iommufd_device_detach(struct iommufd_device
> > > *idev)
> > >               iommu_detach_group(hwpt->domain, idev->group);
> > >       }
> > 
> > emmm how do we track last device detach in a group? Here the first
> > device detach already leads to group detach...
> 
> Oh no. That's a bug. Thanks for catching it.
> 
> We need an additional refcount somewhere to track the number of
> attached devices in the iommu_group.

Wondering if we can let iommu_attach/detach_device handle this:

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d0d7c2177ad6..b38f71e92e2a 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -57,6 +57,7 @@ struct iommu_group {
 	struct iommu_domain *domain;
 	struct list_head entry;
 	unsigned int owner_cnt;
+	unsigned int attached_cnt;
 	void *owner;
 };
 
@@ -64,6 +65,7 @@ struct group_device {
 	struct list_head list;
 	struct device *dev;
 	char *name;
+	bool attached;
 };
 
 struct iommu_group_attribute {
@@ -2035,6 +2037,7 @@ static int __iommu_attach_device(struct iommu_domain *domain,
  */
 int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
 {
+	struct group_device *grp_dev;
 	struct iommu_group *group;
 	int ret;
 
@@ -2042,16 +2045,22 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
 	if (!group)
 		return -ENODEV;
 
-	/*
-	 * Lock the group to make sure the device-count doesn't
-	 * change while we are attaching
-	 */
 	mutex_lock(&group->mutex);
-	ret = -EINVAL;
-	if (iommu_group_device_count(group) != 1)
+
+	list_for_each_entry(grp_dev, &group->devices, list)
+		if (grp_dev->dev == dev)
+			break;
+	if (grp_dev->attached)
 		goto out_unlock;
 
-	ret = __iommu_attach_group(domain, group);
+	/* Attach the group when attaching the first device in the group */
+	if (group->attached_cnt == 0) {
+		ret = __iommu_attach_group(domain, group);
+		if (ret)
+			goto out_unlock;
+	}
+	grp_dev->attached = true;
+	group->attached_cnt++;
 
 out_unlock:
 	mutex_unlock(&group->mutex);
@@ -2071,6 +2080,7 @@ int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
 
 void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
 {
+	struct group_device *grp_dev;
 	struct iommu_group *group;
 
 	group = iommu_group_get(dev);
@@ -2078,10 +2088,20 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
 		return;
 
 	mutex_lock(&group->mutex);
-	if (WARN_ON(domain != group->domain) ||
-	    WARN_ON(iommu_group_device_count(group) != 1))
+	if (WARN_ON(domain != group->domain))
 		goto out_unlock;
-	__iommu_group_set_core_domain(group);
+
+	list_for_each_entry(grp_dev, &group->devices, list)
+		if (grp_dev->dev == dev)
+			break;
+	if (!grp_dev->attached)
+		goto out_unlock;
+
+	grp_dev->attached = false;
+	group->attached_cnt--;
+	/* Detach the group when detaching the last device in the group */
+	if (group->attached_cnt == 0)
+		__iommu_group_set_core_domain(group);
 
 out_unlock:
 	mutex_unlock(&group->mutex);
  
Tian, Kevin Feb. 1, 2023, 3:07 a.m. UTC | #5
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Monday, January 30, 2023 6:22 PM
> 
> On Sun, Jan 29, 2023 at 02:38:55AM -0800, Nicolin Chen wrote:
> 
> > > > @@ -385,10 +372,8 @@ void iommufd_device_detach(struct
> > > > iommufd_device *idev)
> > > >       struct iommufd_hw_pagetable *hwpt = idev->hwpt;
> > > >
> > > >       mutex_lock(&hwpt->ioas->mutex);
> > > > -     mutex_lock(&hwpt->devices_lock);
> > > >       refcount_dec(hwpt->devices_users);
> > > > -     list_del(&idev->devices_item);
> > > > -     if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) {
> > > > +     if (iommufd_hw_pagetable_has_device(hwpt, idev->dev)) {
> > > >               if (refcount_read(hwpt->devices_users) == 1) {
> > > >                       iopt_table_remove_domain(&hwpt->ioas->iopt,
> > > >                                                hwpt->domain);
> > > > @@ -397,7 +382,6 @@ void iommufd_device_detach(struct
> iommufd_device
> > > > *idev)
> > > >               iommu_detach_group(hwpt->domain, idev->group);
> > > >       }
> > >
> > > emmm how do we track last device detach in a group? Here the first
> > > device detach already leads to group detach...
> >
> > Oh no. That's a bug. Thanks for catching it.
> >
> > We need an additional refcount somewhere to track the number of
> > attached devices in the iommu_group.
> 
> Wondering if we can let iommu_attach/detach_device handle this:
> 

that is the desired way to fully remove group awareness in iommufd.

but iirc there were some concerns on changing their semantics. But
I don't remember the detail now. Jason might know. also +Baolu/Robin.

otherwise as long as the group attach/detach continues to be used
then identifying last device in the group always needs some hack
within iommufd itself.
  
Baolu Lu Feb. 1, 2023, 6:49 a.m. UTC | #6
On 2023/2/1 11:07, Tian, Kevin wrote:
>> From: Nicolin Chen <nicolinc@nvidia.com>
>> Sent: Monday, January 30, 2023 6:22 PM
>>
>> On Sun, Jan 29, 2023 at 02:38:55AM -0800, Nicolin Chen wrote:
>>
>>>>> @@ -385,10 +372,8 @@ void iommufd_device_detach(struct
>>>>> iommufd_device *idev)
>>>>>        struct iommufd_hw_pagetable *hwpt = idev->hwpt;
>>>>>
>>>>>        mutex_lock(&hwpt->ioas->mutex);
>>>>> -     mutex_lock(&hwpt->devices_lock);
>>>>>        refcount_dec(hwpt->devices_users);
>>>>> -     list_del(&idev->devices_item);
>>>>> -     if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) {
>>>>> +     if (iommufd_hw_pagetable_has_device(hwpt, idev->dev)) {
>>>>>                if (refcount_read(hwpt->devices_users) == 1) {
>>>>>                        iopt_table_remove_domain(&hwpt->ioas->iopt,
>>>>>                                                 hwpt->domain);
>>>>> @@ -397,7 +382,6 @@ void iommufd_device_detach(struct
>> iommufd_device
>>>>> *idev)
>>>>>                iommu_detach_group(hwpt->domain, idev->group);
>>>>>        }
>>>>
>>>> emmm how do we track last device detach in a group? Here the first
>>>> device detach already leads to group detach...
>>>
>>> Oh no. That's a bug. Thanks for catching it.
>>>
>>> We need an additional refcount somewhere to track the number of
>>> attached devices in the iommu_group.
>>
>> Wondering if we can let iommu_attach/detach_device handle this:
>>
> 
> that is the desired way to fully remove group awareness in iommufd.
> 
> but iirc there were some concerns on changing their semantics. But
> I don't remember the detail now. Jason might know. also +Baolu/Robin.
> 
> otherwise as long as the group attach/detach continues to be used
> then identifying last device in the group always needs some hack
> within iommufd itself.

I have tried to solve this problem.

https://lore.kernel.org/linux-iommu/20220106022053.2406748-1-baolu.lu@linux.intel.com/

I may need to review the original discussion to see if I can update a
new version.

Best regards,
baolu
  
Tian, Kevin Feb. 1, 2023, 6:59 a.m. UTC | #7
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Wednesday, February 1, 2023 2:49 PM
> 
> On 2023/2/1 11:07, Tian, Kevin wrote:
> >> From: Nicolin Chen <nicolinc@nvidia.com>
> >> Sent: Monday, January 30, 2023 6:22 PM
> >>
> >> On Sun, Jan 29, 2023 at 02:38:55AM -0800, Nicolin Chen wrote:
> >>
> >>>>> @@ -385,10 +372,8 @@ void iommufd_device_detach(struct
> >>>>> iommufd_device *idev)
> >>>>>        struct iommufd_hw_pagetable *hwpt = idev->hwpt;
> >>>>>
> >>>>>        mutex_lock(&hwpt->ioas->mutex);
> >>>>> -     mutex_lock(&hwpt->devices_lock);
> >>>>>        refcount_dec(hwpt->devices_users);
> >>>>> -     list_del(&idev->devices_item);
> >>>>> -     if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) {
> >>>>> +     if (iommufd_hw_pagetable_has_device(hwpt, idev->dev)) {
> >>>>>                if (refcount_read(hwpt->devices_users) == 1) {
> >>>>>                        iopt_table_remove_domain(&hwpt->ioas->iopt,
> >>>>>                                                 hwpt->domain);
> >>>>> @@ -397,7 +382,6 @@ void iommufd_device_detach(struct
> >> iommufd_device
> >>>>> *idev)
> >>>>>                iommu_detach_group(hwpt->domain, idev->group);
> >>>>>        }
> >>>>
> >>>> emmm how do we track last device detach in a group? Here the first
> >>>> device detach already leads to group detach...
> >>>
> >>> Oh no. That's a bug. Thanks for catching it.
> >>>
> >>> We need an additional refcount somewhere to track the number of
> >>> attached devices in the iommu_group.
> >>
> >> Wondering if we can let iommu_attach/detach_device handle this:
> >>
> >
> > that is the desired way to fully remove group awareness in iommufd.
> >
> > but iirc there were some concerns on changing their semantics. But
> > I don't remember the detail now. Jason might know. also +Baolu/Robin.
> >
> > otherwise as long as the group attach/detach continues to be used
> > then identifying last device in the group always needs some hack
> > within iommufd itself.
> 
> I have tried to solve this problem.
> 
> https://lore.kernel.org/linux-iommu/20220106022053.2406748-1-
> baolu.lu@linux.intel.com/
> 
> I may need to review the original discussion to see if I can update a
> new version.
> 

emm looks there are quite some discussions to catch up.

anyway assuming this will happen, Nicolin do we still want this
preparatory series for coming nesting support?

iiuc the main motivation was on the complexity of s2 attaching
but with your discussion with Jason looks it's solvable. Then if this
group hack can be separated from the nesting work it avoids
unnecessary dependency in-between.
  
Nicolin Chen Feb. 1, 2023, 7:20 a.m. UTC | #8
On Wed, Feb 01, 2023 at 06:59:21AM +0000, Tian, Kevin wrote:
> External email: Use caution opening links or attachments
> 
> 
> > From: Baolu Lu <baolu.lu@linux.intel.com>
> > Sent: Wednesday, February 1, 2023 2:49 PM
> >
> > On 2023/2/1 11:07, Tian, Kevin wrote:
> > >> From: Nicolin Chen <nicolinc@nvidia.com>
> > >> Sent: Monday, January 30, 2023 6:22 PM
> > >>
> > >> On Sun, Jan 29, 2023 at 02:38:55AM -0800, Nicolin Chen wrote:
> > >>
> > >>>>> @@ -385,10 +372,8 @@ void iommufd_device_detach(struct
> > >>>>> iommufd_device *idev)
> > >>>>>        struct iommufd_hw_pagetable *hwpt = idev->hwpt;
> > >>>>>
> > >>>>>        mutex_lock(&hwpt->ioas->mutex);
> > >>>>> -     mutex_lock(&hwpt->devices_lock);
> > >>>>>        refcount_dec(hwpt->devices_users);
> > >>>>> -     list_del(&idev->devices_item);
> > >>>>> -     if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) {
> > >>>>> +     if (iommufd_hw_pagetable_has_device(hwpt, idev->dev)) {
> > >>>>>                if (refcount_read(hwpt->devices_users) == 1) {
> > >>>>>                        iopt_table_remove_domain(&hwpt->ioas->iopt,
> > >>>>>                                                 hwpt->domain);
> > >>>>> @@ -397,7 +382,6 @@ void iommufd_device_detach(struct
> > >> iommufd_device
> > >>>>> *idev)
> > >>>>>                iommu_detach_group(hwpt->domain, idev->group);
> > >>>>>        }
> > >>>>
> > >>>> emmm how do we track last device detach in a group? Here the first
> > >>>> device detach already leads to group detach...
> > >>>
> > >>> Oh no. That's a bug. Thanks for catching it.
> > >>>
> > >>> We need an additional refcount somewhere to track the number of
> > >>> attached devices in the iommu_group.
> > >>
> > >> Wondering if we can let iommu_attach/detach_device handle this:
> > >>
> > >
> > > that is the desired way to fully remove group awareness in iommufd.
> > >
> > > but iirc there were some concerns on changing their semantics. But
> > > I don't remember the detail now. Jason might know. also +Baolu/Robin.
> > >
> > > otherwise as long as the group attach/detach continues to be used
> > > then identifying last device in the group always needs some hack
> > > within iommufd itself.
> >
> > I have tried to solve this problem.
> >
> > https://lore.kernel.org/linux-iommu/20220106022053.2406748-1-
> > baolu.lu@linux.intel.com/
> >
> > I may need to review the original discussion to see if I can update a
> > new version.
> >
> 
> emm looks there are quite some discussions to catch up.
> 
> anyway assuming this will happen, Nicolin do we still want this
> preparatory series for coming nesting support?
>
> iiuc the main motivation was on the complexity of s2 attaching
> but with your discussion with Jason looks it's solvable. Then if this
> group hack can be separated from the nesting work it avoids
> unnecessary dependency in-between.

The device list is going to overcomplicate either the nesting
series or the replace series. And Jason asked me to send the
replace series prior to our nesting series.

Hoping that we can eliminate the dependencies between those two
series, I took these patches out and sent separately to clean
up a way. Yet, I was naive by thinking that I could do it with
a smaller pathset.

So, assuming we drop this series and move the first two patches
back to the nesting series or the replace series, one of them
would end up doing something ugly:

	if (cur_hwpt != hwpt)
		mutex_lock(&cur_hwpt->device_lock);
	mutex_lock(&hwpt->device_lock);
	...
	mutex_unlock(&hwpt->device_lock);
	if (cur_hwpt != hwpt)
		mutex_unlock(&cur_hwpt->device_lock);

So, perhaps we should discuss about which way we want to choose.

Btw, Baolu's version has a similar patch as mine changing the
iommu_attach/detach_device(), yet also touches _group(). Could
we bisect that series into _device() first and _group() later?
Given that we only need a device-centric API at this moment...

Thanks
Nic
  
Tian, Kevin Feb. 2, 2023, 6:32 a.m. UTC | #9
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Wednesday, February 1, 2023 3:20 PM
> 
> So, assuming we drop this series and move the first two patches
> back to the nesting series or the replace series, one of them
> would end up doing something ugly:
> 
> 	if (cur_hwpt != hwpt)
> 		mutex_lock(&cur_hwpt->device_lock);
> 	mutex_lock(&hwpt->device_lock);
> 	...
> 	mutex_unlock(&hwpt->device_lock);
> 	if (cur_hwpt != hwpt)
> 		mutex_unlock(&cur_hwpt->device_lock);
> 
> So, perhaps we should discuss about which way we want to choose.

from your discussion with Jason I think this locking open has
been settled down.

> 
> Btw, Baolu's version has a similar patch as mine changing the
> iommu_attach/detach_device(), yet also touches _group(). Could
> we bisect that series into _device() first and _group() later?
> Given that we only need a device-centric API at this moment...
> 

I'll let Baolu to decide after he re-catches up the comments in
that thread. But overall I think we now agreed that removing the
device list/lock can be kept out of your replace/nesting series
and let it cleaned up after Baolu's work completes, correct? 😊
  
Nicolin Chen Feb. 2, 2023, 6:36 a.m. UTC | #10
On Thu, Feb 02, 2023 at 06:32:36AM +0000, Tian, Kevin wrote:
> External email: Use caution opening links or attachments
> 
> 
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Wednesday, February 1, 2023 3:20 PM
> >
> > So, assuming we drop this series and move the first two patches
> > back to the nesting series or the replace series, one of them
> > would end up doing something ugly:
> >
> >       if (cur_hwpt != hwpt)
> >               mutex_lock(&cur_hwpt->device_lock);
> >       mutex_lock(&hwpt->device_lock);
> >       ...
> >       mutex_unlock(&hwpt->device_lock);
> >       if (cur_hwpt != hwpt)
> >               mutex_unlock(&cur_hwpt->device_lock);
> >
> > So, perhaps we should discuss about which way we want to choose.
> 
> from your discussion with Jason I think this locking open has
> been settled down.

Yes :)

> >
> > Btw, Baolu's version has a similar patch as mine changing the
> > iommu_attach/detach_device(), yet also touches _group(). Could
> > we bisect that series into _device() first and _group() later?
> > Given that we only need a device-centric API at this moment...
> >
> 
> I'll let Baolu to decide after he re-catches up the comments in
> that thread. But overall I think we now agreed that removing the
> device list/lock can be kept out of your replace/nesting series
> and let it cleaned up after Baolu's work completes, correct? 😊

Correct. It's not a blocker. And I am going to post the replace
series today -- running some additional sanity now.

Thanks
Nic
  

Patch

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 9375fcac884c..f582e59cc51c 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -24,8 +24,6 @@  struct iommufd_device {
 	struct iommufd_object obj;
 	struct iommufd_ctx *ictx;
 	struct iommufd_hw_pagetable *hwpt;
-	/* Head at iommufd_hw_pagetable::devices */
-	struct list_head devices_item;
 	/* always the physical device */
 	struct device *dev;
 	struct iommu_group *group;
@@ -181,15 +179,15 @@  static int iommufd_device_setup_msi(struct iommufd_device *idev,
 	return 0;
 }
 
-static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt,
-					   struct iommu_group *group)
+static bool iommufd_hw_pagetable_has_device(struct iommufd_hw_pagetable *hwpt,
+					    struct device *dev)
 {
-	struct iommufd_device *cur_dev;
-
-	list_for_each_entry(cur_dev, &hwpt->devices, devices_item)
-		if (cur_dev->group == group)
-			return true;
-	return false;
+	/*
+	 * iommu_get_domain_for_dev() returns an iommu_group->domain ptr, if it
+	 * is the same domain as the hwpt->domain, it means that this hwpt has
+	 * the iommu_group/device.
+	 */
+	return hwpt->domain == iommu_get_domain_for_dev(dev);
 }
 
 static int iommufd_device_do_attach(struct iommufd_device *idev,
@@ -200,8 +198,6 @@  static int iommufd_device_do_attach(struct iommufd_device *idev,
 
 	lockdep_assert_held(&hwpt->ioas->mutex);
 
-	mutex_lock(&hwpt->devices_lock);
-
 	/*
 	 * Try to upgrade the domain we have, it is an iommu driver bug to
 	 * report IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail
@@ -215,25 +211,20 @@  static int iommufd_device_do_attach(struct iommufd_device *idev,
 					hwpt->domain);
 		if (!hwpt->enforce_cache_coherency) {
 			WARN_ON(refcount_read(hwpt->devices_users) == 1);
-			rc = -EINVAL;
-			goto out_unlock;
+			return -EINVAL;
 		}
 	}
 
 	rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev,
 						   idev->group, &sw_msi_start);
 	if (rc)
-		goto out_unlock;
+		return rc;
 
 	rc = iommufd_device_setup_msi(idev, hwpt, sw_msi_start);
 	if (rc)
 		goto out_iova;
 
-	/*
-	 * FIXME: Hack around missing a device-centric iommu api, only attach to
-	 * the group once for the first device that is in the group.
-	 */
-	if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) {
+	if (!iommufd_hw_pagetable_has_device(hwpt, idev->dev)) {
 		rc = iommu_attach_group(hwpt->domain, idev->group);
 		if (rc)
 			goto out_iova;
@@ -250,16 +241,12 @@  static int iommufd_device_do_attach(struct iommufd_device *idev,
 	idev->hwpt = hwpt;
 	refcount_inc(&hwpt->obj.users);
 	refcount_inc(hwpt->devices_users);
-	list_add(&idev->devices_item, &hwpt->devices);
-	mutex_unlock(&hwpt->devices_lock);
 	return 0;
 
 out_detach:
 	iommu_detach_group(hwpt->domain, idev->group);
 out_iova:
 	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
-out_unlock:
-	mutex_unlock(&hwpt->devices_lock);
 	return rc;
 }
 
@@ -385,10 +372,8 @@  void iommufd_device_detach(struct iommufd_device *idev)
 	struct iommufd_hw_pagetable *hwpt = idev->hwpt;
 
 	mutex_lock(&hwpt->ioas->mutex);
-	mutex_lock(&hwpt->devices_lock);
 	refcount_dec(hwpt->devices_users);
-	list_del(&idev->devices_item);
-	if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) {
+	if (iommufd_hw_pagetable_has_device(hwpt, idev->dev)) {
 		if (refcount_read(hwpt->devices_users) == 1) {
 			iopt_table_remove_domain(&hwpt->ioas->iopt,
 						 hwpt->domain);
@@ -397,7 +382,6 @@  void iommufd_device_detach(struct iommufd_device *idev)
 		iommu_detach_group(hwpt->domain, idev->group);
 	}
 	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
-	mutex_unlock(&hwpt->devices_lock);
 	mutex_unlock(&hwpt->ioas->mutex);
 
 	if (hwpt->auto_domain)
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 910e759ffeac..868a126ff37d 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -11,11 +11,8 @@  void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
 	struct iommufd_hw_pagetable *hwpt =
 		container_of(obj, struct iommufd_hw_pagetable, obj);
 
-	WARN_ON(!list_empty(&hwpt->devices));
-
 	iommu_domain_free(hwpt->domain);
 	refcount_dec(&hwpt->ioas->obj.users);
-	mutex_destroy(&hwpt->devices_lock);
 	WARN_ON(!refcount_dec_if_one(hwpt->devices_users));
 	kfree(hwpt->devices_users);
 }
@@ -45,9 +42,7 @@  iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
 		goto out_abort;
 	}
 
-	INIT_LIST_HEAD(&hwpt->devices);
 	INIT_LIST_HEAD(&hwpt->hwpt_item);
-	mutex_init(&hwpt->devices_lock);
 	hwpt->devices_users = kzalloc(sizeof(*hwpt->devices_users), GFP_KERNEL);
 	if (!hwpt->devices_users) {
 		rc = -ENOMEM;
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index f128d77fb076..1c8e59b37f46 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -246,9 +246,7 @@  struct iommufd_hw_pagetable {
 	bool msi_cookie : 1;
 	/* Head at iommufd_ioas::hwpt_list */
 	struct list_head hwpt_item;
-	struct mutex devices_lock;
 	refcount_t *devices_users;
-	struct list_head devices;
 };
 
 struct iommufd_hw_pagetable *