[v2,10/12] iommu: Make iommu_queue_iopf() more generic

Message ID 20230727054837.147050-11-baolu.lu@linux.intel.com
State New
Headers
Series iommu: Prepare to deliver page faults to user space |

Commit Message

Baolu Lu July 27, 2023, 5:48 a.m. UTC
  This completely separates the IO page fault handling framework from the
SVA implementation. Previously, the SVA implementation was tightly coupled
with the IO page fault handling framework. This makes SVA a "customer" of
the IO page fault handling framework by converting domain's page fault
handler to handle a group of faults and calling it directly from
iommu_queue_iopf().

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 include/linux/iommu.h      |  5 +++--
 drivers/iommu/iommu-sva.h  |  8 --------
 drivers/iommu/io-pgfault.c | 16 +++++++++++++---
 drivers/iommu/iommu-sva.c  | 11 ++---------
 drivers/iommu/iommu.c      |  2 +-
 5 files changed, 19 insertions(+), 23 deletions(-)
  

Comments

Jason Gunthorpe Aug. 10, 2023, 7:07 p.m. UTC | #1
On Thu, Jul 27, 2023 at 01:48:35PM +0800, Lu Baolu wrote:
> @@ -137,6 +136,16 @@ int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
>  		return 0;
>  	}
>  
> +	if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)
> +		domain = iommu_get_domain_for_dev_pasid(dev, fault->prm.pasid, 0);
> +	else
> +		domain = iommu_get_domain_for_dev(dev);

How does the lifetime work for this? What prevents UAF on domain?

> diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
> index ab42cfdd7636..668f4c2bcf65 100644
> --- a/drivers/iommu/iommu-sva.c
> +++ b/drivers/iommu/iommu-sva.c
> @@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
>  /*
>   * I/O page fault handler for SVA
>   */
> -enum iommu_page_response_code
> +static enum iommu_page_response_code
>  iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
>  {
>  	vm_fault_t ret;
> @@ -241,23 +241,16 @@ static void iopf_handler(struct work_struct *work)
>  {
>  	struct iopf_fault *iopf;
>  	struct iopf_group *group;
> -	struct iommu_domain *domain;
>  	enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
>  
>  	group = container_of(work, struct iopf_group, work);
> -	domain = iommu_get_domain_for_dev_pasid(group->dev,
> -				group->last_fault.fault.prm.pasid, 0);
> -	if (!domain || !domain->iopf_handler)
> -		status = IOMMU_PAGE_RESP_INVALID;
> -
>  	list_for_each_entry(iopf, &group->faults, list) {
>  		/*
>  		 * For the moment, errors are sticky: don't handle subsequent
>  		 * faults in the group if there is an error.
>  		 */
>  		if (status == IOMMU_PAGE_RESP_SUCCESS)
> -			status = domain->iopf_handler(&iopf->fault,
> -						      domain->fault_data);
> +			status = iommu_sva_handle_iopf(&iopf->fault, group->data);
>  	}
>  
>  	iopf_complete_group(group->dev, &group->last_fault, status);
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 157a28a49473..535a36e3edc9 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -3330,7 +3330,7 @@ struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
>  	domain->type = IOMMU_DOMAIN_SVA;
>  	mmgrab(mm);
>  	domain->mm = mm;
> -	domain->iopf_handler = iommu_sva_handle_iopf;
> +	domain->iopf_handler = iommu_sva_handle_iopf_group;
>  	domain->fault_data = mm;

This also has lifetime problems on the mm.

The domain should flow into the iommu_sva_handle_iopf() instead of the
void *data.

The SVA code can then just use domain->mm directly.

We need to document/figure out some how to ensure that the faults are
all done processing before a fault enabled domain can be freed.

This patch would be better ordered before the prior patch.

Jason
  
Baolu Lu Aug. 11, 2023, 2:21 a.m. UTC | #2
On 2023/8/11 3:07, Jason Gunthorpe wrote:
> On Thu, Jul 27, 2023 at 01:48:35PM +0800, Lu Baolu wrote:
>> @@ -137,6 +136,16 @@ int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
>>   		return 0;
>>   	}
>>   
>> +	if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)
>> +		domain = iommu_get_domain_for_dev_pasid(dev, fault->prm.pasid, 0);
>> +	else
>> +		domain = iommu_get_domain_for_dev(dev);
> 
> How does the lifetime work for this? What prevents UAF on domain?

Replied below.

> 
>> diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
>> index ab42cfdd7636..668f4c2bcf65 100644
>> --- a/drivers/iommu/iommu-sva.c
>> +++ b/drivers/iommu/iommu-sva.c
>> @@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
>>   /*
>>    * I/O page fault handler for SVA
>>    */
>> -enum iommu_page_response_code
>> +static enum iommu_page_response_code
>>   iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
>>   {
>>   	vm_fault_t ret;
>> @@ -241,23 +241,16 @@ static void iopf_handler(struct work_struct *work)
>>   {
>>   	struct iopf_fault *iopf;
>>   	struct iopf_group *group;
>> -	struct iommu_domain *domain;
>>   	enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
>>   
>>   	group = container_of(work, struct iopf_group, work);
>> -	domain = iommu_get_domain_for_dev_pasid(group->dev,
>> -				group->last_fault.fault.prm.pasid, 0);
>> -	if (!domain || !domain->iopf_handler)
>> -		status = IOMMU_PAGE_RESP_INVALID;
>> -
>>   	list_for_each_entry(iopf, &group->faults, list) {
>>   		/*
>>   		 * For the moment, errors are sticky: don't handle subsequent
>>   		 * faults in the group if there is an error.
>>   		 */
>>   		if (status == IOMMU_PAGE_RESP_SUCCESS)
>> -			status = domain->iopf_handler(&iopf->fault,
>> -						      domain->fault_data);
>> +			status = iommu_sva_handle_iopf(&iopf->fault, group->data);
>>   	}
>>   
>>   	iopf_complete_group(group->dev, &group->last_fault, status);
>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>> index 157a28a49473..535a36e3edc9 100644
>> --- a/drivers/iommu/iommu.c
>> +++ b/drivers/iommu/iommu.c
>> @@ -3330,7 +3330,7 @@ struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
>>   	domain->type = IOMMU_DOMAIN_SVA;
>>   	mmgrab(mm);
>>   	domain->mm = mm;
>> -	domain->iopf_handler = iommu_sva_handle_iopf;
>> +	domain->iopf_handler = iommu_sva_handle_iopf_group;
>>   	domain->fault_data = mm;
> 
> This also has lifetime problems on the mm.
> 
> The domain should flow into the iommu_sva_handle_iopf() instead of the
> void *data.

Okay, but I still want to keep void *data as a private pointer of the
iopf consumer. For SVA, it's probably NULL.

> 
> The SVA code can then just use domain->mm directly.

Yes.

> 
> We need to document/figure out some how to ensure that the faults are
> all done processing before a fault enabled domain can be freed.

This has been documented in drivers/iommu/io-pgfault.c:

[...]
  * Any valid page fault will be eventually routed to an iommu domain 
and the
  * page fault handler installed there will get called. The users of this
  * handling framework should guarantee that the iommu domain could only be
  * freed after the device has stopped generating page faults (or the iommu
  * hardware has been set to block the page faults) and the pending page 
faults
  * have been flushed.
  *
  * Return: 0 on success and <0 on error.
  */
int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
[...]

> This patch would be better ordered before the prior patch.

Let me try this in the next version.

Best regards,
baolu
  
Jason Gunthorpe Aug. 11, 2023, 1:29 p.m. UTC | #3
On Fri, Aug 11, 2023 at 10:21:20AM +0800, Baolu Lu wrote:

> > This also has lifetime problems on the mm.
> > 
> > The domain should flow into the iommu_sva_handle_iopf() instead of the
> > void *data.
> 
> Okay, but I still want to keep void *data as a private pointer of the
> iopf consumer. For SVA, it's probably NULL.

I'd rather give the iommu_domain some 'private' void * than pass
around weird pointers all over the place... That might be broadly
useful, eg iommufd could store the hwpt in there.

> > We need to document/figure out some how to ensure that the faults are
> > all done processing before a fault enabled domain can be freed.
> 
> This has been documented in drivers/iommu/io-pgfault.c:
> 
> [...]
>  * Any valid page fault will be eventually routed to an iommu domain and the
>  * page fault handler installed there will get called. The users of this
>  * handling framework should guarantee that the iommu domain could only be
>  * freed after the device has stopped generating page faults (or the iommu
>  * hardware has been set to block the page faults) and the pending page
> faults
>  * have been flushed.
>  *
>  * Return: 0 on success and <0 on error.
>  */
> int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
> [...]
> 
> > This patch would be better ordered before the prior patch.
> 
> Let me try this in the next version.

Okay.. but can we have some debugging to enforce this maybe? Also add
a comment when we obtain the domain on this path to see the above
about the lifetime

Jason
  
Baolu Lu Aug. 12, 2023, 11:18 p.m. UTC | #4
On 2023/8/11 21:29, Jason Gunthorpe wrote:
> On Fri, Aug 11, 2023 at 10:21:20AM +0800, Baolu Lu wrote:
> 
>>> This also has lifetime problems on the mm.
>>>
>>> The domain should flow into the iommu_sva_handle_iopf() instead of the
>>> void *data.
>>
>> Okay, but I still want to keep void *data as a private pointer of the
>> iopf consumer. For SVA, it's probably NULL.
> 
> I'd rather give the iommu_domain some 'private' void * than pass
> around weird pointers all over the place... That might be broadly
> useful, eg iommufd could store the hwpt in there.

Yes, you are right. With the private pointer stored in domain and domain
is passed to the iopf handler, there will be no need for a @data
parameter for iopf handler.

> 
>>> We need to document/figure out some how to ensure that the faults are
>>> all done processing before a fault enabled domain can be freed.
>>
>> This has been documented in drivers/iommu/io-pgfault.c:
>>
>> [...]
>>   * Any valid page fault will be eventually routed to an iommu domain and the
>>   * page fault handler installed there will get called. The users of this
>>   * handling framework should guarantee that the iommu domain could only be
>>   * freed after the device has stopped generating page faults (or the iommu
>>   * hardware has been set to block the page faults) and the pending page
>> faults
>>   * have been flushed.
>>   *
>>   * Return: 0 on success and <0 on error.
>>   */
>> int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
>> [...]
>>
>>> This patch would be better ordered before the prior patch.
>>
>> Let me try this in the next version.
> 
> Okay.. but can we have some debugging to enforce this maybe? Also add
> a comment when we obtain the domain on this path to see the above
> about the lifetime

Yes. Sure.

Probably I will add a dev_warn() when get_domain for device or pasid
returns NULL...

In the paths of removing domain from device or pasid, I will add a check
for pending faults. Will trigger a dev_warn() if there is any pending
faults for the affected domain.

Best regards,
baolu
  

Patch

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 607740e548f2..1dafe031a56c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -40,6 +40,7 @@  struct notifier_block;
 struct iommu_sva;
 struct iommu_fault_event;
 struct iommu_dma_cookie;
+struct iopf_group;
 
 #define IOMMU_FAULT_PERM_READ	(1 << 0) /* read */
 #define IOMMU_FAULT_PERM_WRITE	(1 << 1) /* write */
@@ -179,8 +180,7 @@  struct iommu_domain {
 	unsigned long pgsize_bitmap;	/* Bitmap of page sizes in use */
 	struct iommu_domain_geometry geometry;
 	struct iommu_dma_cookie *iova_cookie;
-	enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault,
-						      void *data);
+	int (*iopf_handler)(struct iopf_group *group);
 	void *fault_data;
 	union {
 		struct {
@@ -513,6 +513,7 @@  struct iopf_group {
 	struct list_head		faults;
 	struct work_struct		work;
 	struct device			*dev;
+	void				*data;
 };
 
 int iommu_device_register(struct iommu_device *iommu,
diff --git a/drivers/iommu/iommu-sva.h b/drivers/iommu/iommu-sva.h
index 510a7df23fba..cf41e88fac17 100644
--- a/drivers/iommu/iommu-sva.h
+++ b/drivers/iommu/iommu-sva.h
@@ -22,8 +22,6 @@  int iopf_queue_flush_dev(struct device *dev);
 struct iopf_queue *iopf_queue_alloc(const char *name);
 void iopf_queue_free(struct iopf_queue *queue);
 int iopf_queue_discard_partial(struct iopf_queue *queue);
-enum iommu_page_response_code
-iommu_sva_handle_iopf(struct iommu_fault *fault, void *data);
 void iopf_free_group(struct iopf_group *group);
 int iopf_queue_work(struct iopf_group *group, work_func_t func);
 int iommu_sva_handle_iopf_group(struct iopf_group *group);
@@ -65,12 +63,6 @@  static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
 	return -ENODEV;
 }
 
-static inline enum iommu_page_response_code
-iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
-{
-	return IOMMU_PAGE_RESP_INVALID;
-}
-
 static inline void iopf_free_group(struct iopf_group *group)
 {
 }
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 3614a800638c..c5cfa3dfa77b 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -11,8 +11,6 @@ 
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 
-#include "iommu-sva.h"
-
 /**
  * struct iopf_queue - IO Page Fault queue
  * @wq: the fault workqueue
@@ -106,6 +104,7 @@  int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
 {
 	int ret;
 	struct iopf_group *group;
+	struct iommu_domain *domain;
 	struct iopf_fault *iopf, *next;
 	struct iopf_device_param *iopf_param;
 	struct dev_iommu *param = dev->iommu;
@@ -137,6 +136,16 @@  int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
 		return 0;
 	}
 
+	if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)
+		domain = iommu_get_domain_for_dev_pasid(dev, fault->prm.pasid, 0);
+	else
+		domain = iommu_get_domain_for_dev(dev);
+
+	if (!domain || !domain->iopf_handler) {
+		ret = -ENODEV;
+		goto cleanup_partial;
+	}
+
 	group = kzalloc(sizeof(*group), GFP_KERNEL);
 	if (!group) {
 		/*
@@ -150,6 +159,7 @@  int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
 
 	group->dev = dev;
 	group->last_fault.fault = *fault;
+	group->data = domain->fault_data;
 	INIT_LIST_HEAD(&group->faults);
 	list_add(&group->last_fault.list, &group->faults);
 
@@ -160,7 +170,7 @@  int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
 			list_move(&iopf->list, &group->faults);
 	}
 
-	ret = iommu_sva_handle_iopf_group(group);
+	ret = domain->iopf_handler(group);
 	if (ret)
 		iopf_free_group(group);
 
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index ab42cfdd7636..668f4c2bcf65 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -157,7 +157,7 @@  EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
 /*
  * I/O page fault handler for SVA
  */
-enum iommu_page_response_code
+static enum iommu_page_response_code
 iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
 {
 	vm_fault_t ret;
@@ -241,23 +241,16 @@  static void iopf_handler(struct work_struct *work)
 {
 	struct iopf_fault *iopf;
 	struct iopf_group *group;
-	struct iommu_domain *domain;
 	enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
 
 	group = container_of(work, struct iopf_group, work);
-	domain = iommu_get_domain_for_dev_pasid(group->dev,
-				group->last_fault.fault.prm.pasid, 0);
-	if (!domain || !domain->iopf_handler)
-		status = IOMMU_PAGE_RESP_INVALID;
-
 	list_for_each_entry(iopf, &group->faults, list) {
 		/*
 		 * For the moment, errors are sticky: don't handle subsequent
 		 * faults in the group if there is an error.
 		 */
 		if (status == IOMMU_PAGE_RESP_SUCCESS)
-			status = domain->iopf_handler(&iopf->fault,
-						      domain->fault_data);
+			status = iommu_sva_handle_iopf(&iopf->fault, group->data);
 	}
 
 	iopf_complete_group(group->dev, &group->last_fault, status);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 157a28a49473..535a36e3edc9 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3330,7 +3330,7 @@  struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
 	domain->type = IOMMU_DOMAIN_SVA;
 	mmgrab(mm);
 	domain->mm = mm;
-	domain->iopf_handler = iommu_sva_handle_iopf;
+	domain->iopf_handler = iommu_sva_handle_iopf_group;
 	domain->fault_data = mm;
 
 	return domain;