diff mbox series

[v2,08/12] iommu: Prepare for separating SVA and IOPF

Message ID	20230727054837.147050-9-baolu.lu@linux.intel.com
State	New
Headers	Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) client-ip=2620:137:e000::1:20; From: Lu Baolu <baolu.lu@linux.intel.com> To: Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>, Robin Murphy <robin.murphy@arm.com>, Jason Gunthorpe <jgg@ziepe.ca>, Kevin Tian <kevin.tian@intel.com>, Jean-Philippe Brucker <jean-philippe@linaro.org>, Nicolin Chen <nicolinc@nvidia.com> Cc: Yi Liu <yi.l.liu@intel.com>, Jacob Pan <jacob.jun.pan@linux.intel.com>, iommu@lists.linux.dev, kvm@vger.kernel.org, linux-kernel@vger.kernel.org, Lu Baolu <baolu.lu@linux.intel.com> Subject: [PATCH v2 08/12] iommu: Prepare for separating SVA and IOPF Date: Thu, 27 Jul 2023 13:48:33 +0800 Message-Id: <20230727054837.147050-9-baolu.lu@linux.intel.com> In-Reply-To: <20230727054837.147050-1-baolu.lu@linux.intel.com> References: <20230727054837.147050-1-baolu.lu@linux.intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	iommu: Prepare to deliver page faults to user space \| [v2,00/12] iommu: Prepare to deliver page faults to user space [v2,01/12] iommu: Move iommu fault data to linux/iommu.h [v2,02/12] iommu/arm-smmu-v3: Remove unrecoverable faults reporting [v2,03/12] iommu: Remove unrecoverable fault data [v2,04/12] iommu: Replace device fault handler with iommu_queue_iopf() [v2,05/12] iommu: Change the return value of dev_iommu_get() [v2,06/12] iommu: Make dev->fault_param static [v2,07/12] iommu: Remove iommu_[un]register_device_fault_handler() [v2,08/12] iommu: Prepare for separating SVA and IOPF [v2,09/12] iommu: Move iopf_handler() to iommu-sva.c [v2,10/12] iommu: Make iommu_queue_iopf() more generic [v2,11/12] iommu: Separate SVA and IOPF in Makefile and Kconfig [v2,12/12] iommu: Add helper to set iopf handler for domain

Commit Message

Baolu Lu July 27, 2023, 5:48 a.m. UTC

  Move iopf_group data structure to iommu.h. This is being done to make it
a minimal set of faults that a domain's page fault handler should handle.

Add two new helpers for the domain's page fault handler:
- iopf_free_group: free a fault group after all faults in the group are
  handled.
- iopf_queue_work: queue a given work item for a fault group.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 include/linux/iommu.h      | 12 ++++++++++
 drivers/iommu/io-pgfault.c | 48 ++++++++++++++++++++++----------------
 2 files changed, 40 insertions(+), 20 deletions(-)

Comments

Tian, Kevin Aug. 3, 2023, 8:16 a.m. UTC | #1

> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Thursday, July 27, 2023 1:49 PM
>
> @@ -82,7 +82,7 @@ static void iopf_handler(struct work_struct *work)
>  	if (!domain || !domain->iopf_handler)
>  		status = IOMMU_PAGE_RESP_INVALID;
> 
> -	list_for_each_entry_safe(iopf, next, &group->faults, list) {
> +	list_for_each_entry(iopf, &group->faults, list) {
>  		/*
>  		 * For the moment, errors are sticky: don't handle
> subsequent
>  		 * faults in the group if there is an error.
> @@ -90,14 +90,20 @@ static void iopf_handler(struct work_struct *work)
>  		if (status == IOMMU_PAGE_RESP_SUCCESS)
>  			status = domain->iopf_handler(&iopf->fault,
>  						      domain->fault_data);
> -
> -		if (!(iopf->fault.prm.flags &
> -		      IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
> -			kfree(iopf);
>  	}
> 
>  	iopf_complete_group(group->dev, &group->last_fault, status);
> -	kfree(group);
> +	iopf_free_group(group);
> +}

this is perf-critical path. It's not good to traverse the list twice.

> +
> +static int iopf_queue_work(struct iopf_group *group, work_func_t func)
> +{
> +	struct iopf_device_param *iopf_param = group->dev->iommu-
> >iopf_param;
> +
> +	INIT_WORK(&group->work, func);
> +	queue_work(iopf_param->queue->wq, &group->work);
> +
> +	return 0;
>  }

Is there plan to introduce further error in the future? otherwise this should
be void.

btw the work queue is only for sva. If there is no other caller this can be
just kept in iommu-sva.c. No need to create a helper.

> @@ -199,8 +204,11 @@ int iommu_queue_iopf(struct iommu_fault *fault,
> struct device *dev)
>  			list_move(&iopf->list, &group->faults);
>  	}
> 
> -	queue_work(iopf_param->queue->wq, &group->work);
> -	return 0;
> +	ret = iopf_queue_work(group, iopf_handler);
> +	if (ret)
> +		iopf_free_group(group);
> +
> +	return ret;
> 

Here we can document that the iopf handler (in patch10) should free the
group, allowing the optimization inside the handler.

Baolu Lu Aug. 4, 2023, 3:26 a.m. UTC | #2

On 2023/8/3 16:16, Tian, Kevin wrote:
>> From: Lu Baolu <baolu.lu@linux.intel.com>
>> Sent: Thursday, July 27, 2023 1:49 PM
>>
>> @@ -82,7 +82,7 @@ static void iopf_handler(struct work_struct *work)
>>   	if (!domain || !domain->iopf_handler)
>>   		status = IOMMU_PAGE_RESP_INVALID;
>>
>> -	list_for_each_entry_safe(iopf, next, &group->faults, list) {
>> +	list_for_each_entry(iopf, &group->faults, list) {
>>   		/*
>>   		 * For the moment, errors are sticky: don't handle
>> subsequent
>>   		 * faults in the group if there is an error.
>> @@ -90,14 +90,20 @@ static void iopf_handler(struct work_struct *work)
>>   		if (status == IOMMU_PAGE_RESP_SUCCESS)
>>   			status = domain->iopf_handler(&iopf->fault,
>>   						      domain->fault_data);
>> -
>> -		if (!(iopf->fault.prm.flags &
>> -		      IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
>> -			kfree(iopf);
>>   	}
>>
>>   	iopf_complete_group(group->dev, &group->last_fault, status);
>> -	kfree(group);
>> +	iopf_free_group(group);
>> +}
> 
> this is perf-critical path. It's not good to traverse the list twice.

Freeing the fault group is not critical anymore, right?

> 
>> +
>> +static int iopf_queue_work(struct iopf_group *group, work_func_t func)
>> +{
>> +	struct iopf_device_param *iopf_param = group->dev->iommu-
>>> iopf_param;
>> +
>> +	INIT_WORK(&group->work, func);
>> +	queue_work(iopf_param->queue->wq, &group->work);
>> +
>> +	return 0;
>>   }
> 
> Is there plan to introduce further error in the future? otherwise this should
> be void.

queue_work() return true or false. I should check and return the value.

> 
> btw the work queue is only for sva. If there is no other caller this can be
> just kept in iommu-sva.c. No need to create a helper.

The definition of struct iopf_device_param is in this file. So I added a
helper to avoid making iopf_device_param visible globally.

> 
>> @@ -199,8 +204,11 @@ int iommu_queue_iopf(struct iommu_fault *fault,
>> struct device *dev)
>>   			list_move(&iopf->list, &group->faults);
>>   	}
>>
>> -	queue_work(iopf_param->queue->wq, &group->work);
>> -	return 0;
>> +	ret = iopf_queue_work(group, iopf_handler);
>> +	if (ret)
>> +		iopf_free_group(group);
>> +
>> +	return ret;
>>
> 
> Here we can document that the iopf handler (in patch10) should free the
> group, allowing the optimization inside the handler.

Yeah!

Best regards,
baolu

Jason Gunthorpe Aug. 8, 2023, 6:43 p.m. UTC | #3

On Thu, Aug 03, 2023 at 08:16:47AM +0000, Tian, Kevin wrote:

> Is there plan to introduce further error in the future? otherwise this should
> be void.
> 
> btw the work queue is only for sva. If there is no other caller this can be
> just kept in iommu-sva.c. No need to create a helper.

I think more than just SVA will need a work queue context to process
their faults.

Jason

Tian, Kevin Aug. 9, 2023, 12:02 a.m. UTC | #4

> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Wednesday, August 9, 2023 2:43 AM
> 
> On Thu, Aug 03, 2023 at 08:16:47AM +0000, Tian, Kevin wrote:
> 
> > Is there plan to introduce further error in the future? otherwise this should
> > be void.
> >
> > btw the work queue is only for sva. If there is no other caller this can be
> > just kept in iommu-sva.c. No need to create a helper.
> 
> I think more than just SVA will need a work queue context to process
> their faults.
> 

then this series needs more work. Currently the abstraction doesn't
include workqueue in the common fault reporting layer.

Baolu Lu Aug. 9, 2023, 10:40 a.m. UTC | #5

On 2023/8/9 8:02, Tian, Kevin wrote:
>> From: Jason Gunthorpe <jgg@ziepe.ca>
>> Sent: Wednesday, August 9, 2023 2:43 AM
>>
>> On Thu, Aug 03, 2023 at 08:16:47AM +0000, Tian, Kevin wrote:
>>
>>> Is there plan to introduce further error in the future? otherwise this should
>>> be void.
>>>
>>> btw the work queue is only for sva. If there is no other caller this can be
>>> just kept in iommu-sva.c. No need to create a helper.
>>
>> I think more than just SVA will need a work queue context to process
>> their faults.
>>
> 
> then this series needs more work. Currently the abstraction doesn't
> include workqueue in the common fault reporting layer.

Do you mind elaborate a bit here? workqueue is a basic infrastructure in
the fault handling framework, but it lets the consumers choose to use
it, or not to.

Best regards,
baolu

Tian, Kevin Aug. 10, 2023, 2:35 a.m. UTC | #6

> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Wednesday, August 9, 2023 6:41 PM
> 
> On 2023/8/9 8:02, Tian, Kevin wrote:
> >> From: Jason Gunthorpe <jgg@ziepe.ca>
> >> Sent: Wednesday, August 9, 2023 2:43 AM
> >>
> >> On Thu, Aug 03, 2023 at 08:16:47AM +0000, Tian, Kevin wrote:
> >>
> >>> Is there plan to introduce further error in the future? otherwise this
> should
> >>> be void.
> >>>
> >>> btw the work queue is only for sva. If there is no other caller this can be
> >>> just kept in iommu-sva.c. No need to create a helper.
> >>
> >> I think more than just SVA will need a work queue context to process
> >> their faults.
> >>
> >
> > then this series needs more work. Currently the abstraction doesn't
> > include workqueue in the common fault reporting layer.
> 
> Do you mind elaborate a bit here? workqueue is a basic infrastructure in
> the fault handling framework, but it lets the consumers choose to use
> it, or not to.
> 

My understanding of Jason's comment was to make the workqueue the
default path instead of being opted by the consumer.. that is my 1st
impression but might be wrong...

Jason Gunthorpe Aug. 10, 2023, 4:47 p.m. UTC | #7

On Thu, Aug 10, 2023 at 02:35:40AM +0000, Tian, Kevin wrote:
> > From: Baolu Lu <baolu.lu@linux.intel.com>
> > Sent: Wednesday, August 9, 2023 6:41 PM
> > 
> > On 2023/8/9 8:02, Tian, Kevin wrote:
> > >> From: Jason Gunthorpe <jgg@ziepe.ca>
> > >> Sent: Wednesday, August 9, 2023 2:43 AM
> > >>
> > >> On Thu, Aug 03, 2023 at 08:16:47AM +0000, Tian, Kevin wrote:
> > >>
> > >>> Is there plan to introduce further error in the future? otherwise this
> > should
> > >>> be void.
> > >>>
> > >>> btw the work queue is only for sva. If there is no other caller this can be
> > >>> just kept in iommu-sva.c. No need to create a helper.
> > >>
> > >> I think more than just SVA will need a work queue context to process
> > >> their faults.
> > >>
> > >
> > > then this series needs more work. Currently the abstraction doesn't
> > > include workqueue in the common fault reporting layer.
> > 
> > Do you mind elaborate a bit here? workqueue is a basic infrastructure in
> > the fault handling framework, but it lets the consumers choose to use
> > it, or not to.
> > 
> 
> My understanding of Jason's comment was to make the workqueue the
> default path instead of being opted by the consumer.. that is my 1st
> impression but might be wrong...

Yeah, that is one path. Do we have anyone that uses this that doesn't
want the WQ? (actually who even uses this besides SVA?)

Jason

Baolu Lu Aug. 11, 2023, 1:53 a.m. UTC | #8

On 2023/8/11 0:47, Jason Gunthorpe wrote:
> On Thu, Aug 10, 2023 at 02:35:40AM +0000, Tian, Kevin wrote:
>>> From: Baolu Lu<baolu.lu@linux.intel.com>
>>> Sent: Wednesday, August 9, 2023 6:41 PM
>>>
>>> On 2023/8/9 8:02, Tian, Kevin wrote:
>>>>> From: Jason Gunthorpe<jgg@ziepe.ca>
>>>>> Sent: Wednesday, August 9, 2023 2:43 AM
>>>>>
>>>>> On Thu, Aug 03, 2023 at 08:16:47AM +0000, Tian, Kevin wrote:
>>>>>
>>>>>> Is there plan to introduce further error in the future? otherwise this
>>> should
>>>>>> be void.
>>>>>>
>>>>>> btw the work queue is only for sva. If there is no other caller this can be
>>>>>> just kept in iommu-sva.c. No need to create a helper.
>>>>> I think more than just SVA will need a work queue context to process
>>>>> their faults.
>>>>>
>>>> then this series needs more work. Currently the abstraction doesn't
>>>> include workqueue in the common fault reporting layer.
>>> Do you mind elaborate a bit here? workqueue is a basic infrastructure in
>>> the fault handling framework, but it lets the consumers choose to use
>>> it, or not to.
>>>
>> My understanding of Jason's comment was to make the workqueue the
>> default path instead of being opted by the consumer.. that is my 1st
>> impression but might be wrong...
> Yeah, that is one path. Do we have anyone that uses this that doesn't
> want the WQ? (actually who even uses this besides SVA?)

I am still confused. When we forward iopf's to user space through the
iommufd, we don't need to schedule a WQ, right? Or I misunderstood here?

Best regards,
baolu

Jason Gunthorpe Aug. 11, 2023, 1:27 p.m. UTC | #9

On Fri, Aug 11, 2023 at 09:53:41AM +0800, Baolu Lu wrote:
> On 2023/8/11 0:47, Jason Gunthorpe wrote:
> > On Thu, Aug 10, 2023 at 02:35:40AM +0000, Tian, Kevin wrote:
> > > > From: Baolu Lu<baolu.lu@linux.intel.com>
> > > > Sent: Wednesday, August 9, 2023 6:41 PM
> > > > 
> > > > On 2023/8/9 8:02, Tian, Kevin wrote:
> > > > > > From: Jason Gunthorpe<jgg@ziepe.ca>
> > > > > > Sent: Wednesday, August 9, 2023 2:43 AM
> > > > > > 
> > > > > > On Thu, Aug 03, 2023 at 08:16:47AM +0000, Tian, Kevin wrote:
> > > > > > 
> > > > > > > Is there plan to introduce further error in the future? otherwise this
> > > > should
> > > > > > > be void.
> > > > > > > 
> > > > > > > btw the work queue is only for sva. If there is no other caller this can be
> > > > > > > just kept in iommu-sva.c. No need to create a helper.
> > > > > > I think more than just SVA will need a work queue context to process
> > > > > > their faults.
> > > > > > 
> > > > > then this series needs more work. Currently the abstraction doesn't
> > > > > include workqueue in the common fault reporting layer.
> > > > Do you mind elaborate a bit here? workqueue is a basic infrastructure in
> > > > the fault handling framework, but it lets the consumers choose to use
> > > > it, or not to.
> > > > 
> > > My understanding of Jason's comment was to make the workqueue the
> > > default path instead of being opted by the consumer.. that is my 1st
> > > impression but might be wrong...
> > Yeah, that is one path. Do we have anyone that uses this that doesn't
> > want the WQ? (actually who even uses this besides SVA?)
> 
> I am still confused. When we forward iopf's to user space through the
> iommufd, we don't need to schedule a WQ, right? Or I misunderstood
> here?

Yes, that could be true, iommufd could just queue it from the
interrupt context and trigger a wakeup.

But other iommufd modes would want to invoke hmm_range_fault() which
would need the work queue.

Jason

Baolu Lu Aug. 13, 2023, 11:19 a.m. UTC | #10

On 2023/8/11 21:27, Jason Gunthorpe wrote:
> On Fri, Aug 11, 2023 at 09:53:41AM +0800, Baolu Lu wrote:
>> On 2023/8/11 0:47, Jason Gunthorpe wrote:
>>> On Thu, Aug 10, 2023 at 02:35:40AM +0000, Tian, Kevin wrote:
>>>>> From: Baolu Lu<baolu.lu@linux.intel.com>
>>>>> Sent: Wednesday, August 9, 2023 6:41 PM
>>>>>
>>>>> On 2023/8/9 8:02, Tian, Kevin wrote:
>>>>>>> From: Jason Gunthorpe<jgg@ziepe.ca>
>>>>>>> Sent: Wednesday, August 9, 2023 2:43 AM
>>>>>>>
>>>>>>> On Thu, Aug 03, 2023 at 08:16:47AM +0000, Tian, Kevin wrote:
>>>>>>>
>>>>>>>> Is there plan to introduce further error in the future? otherwise this
>>>>> should
>>>>>>>> be void.
>>>>>>>>
>>>>>>>> btw the work queue is only for sva. If there is no other caller this can be
>>>>>>>> just kept in iommu-sva.c. No need to create a helper.
>>>>>>> I think more than just SVA will need a work queue context to process
>>>>>>> their faults.
>>>>>>>
>>>>>> then this series needs more work. Currently the abstraction doesn't
>>>>>> include workqueue in the common fault reporting layer.
>>>>> Do you mind elaborate a bit here? workqueue is a basic infrastructure in
>>>>> the fault handling framework, but it lets the consumers choose to use
>>>>> it, or not to.
>>>>>
>>>> My understanding of Jason's comment was to make the workqueue the
>>>> default path instead of being opted by the consumer.. that is my 1st
>>>> impression but might be wrong...
>>> Yeah, that is one path. Do we have anyone that uses this that doesn't
>>> want the WQ? (actually who even uses this besides SVA?)
>> I am still confused. When we forward iopf's to user space through the
>> iommufd, we don't need to schedule a WQ, right? Or I misunderstood
>> here?
> Yes, that could be true, iommufd could just queue it from the
> interrupt context and trigger a wakeup.
> 
> But other iommufd modes would want to invoke hmm_range_fault() which
> would need the work queue.

Yes. That's the reason why I added below helper

int iopf_queue_work(struct iopf_group *group, work_func_t func)

in the patch 09/12.

Best regards,
baolu

diff mbox series

Patch

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index cb12bab38365..607740e548f2 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -503,6 +503,18 @@  struct dev_iommu {
 	u32				pci_32bit_workaround:1;
 };
 
+struct iopf_fault {
+	struct iommu_fault		fault;
+	struct list_head		list;
+};
+
+struct iopf_group {
+	struct iopf_fault		last_fault;
+	struct list_head		faults;
+	struct work_struct		work;
+	struct device			*dev;
+};
+
 int iommu_device_register(struct iommu_device *iommu,
 			  const struct iommu_ops *ops,
 			  struct device *hwdev);
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 7e6697083f9d..1432751ff4d4 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -40,17 +40,17 @@  struct iopf_device_param {
 	struct list_head		partial;
 };
 
-struct iopf_fault {
-	struct iommu_fault		fault;
-	struct list_head		list;
-};
+static void iopf_free_group(struct iopf_group *group)
+{
+	struct iopf_fault *iopf, *next;
 
-struct iopf_group {
-	struct iopf_fault		last_fault;
-	struct list_head		faults;
-	struct work_struct		work;
-	struct device			*dev;
-};
+	list_for_each_entry_safe(iopf, next, &group->faults, list) {
+		if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
+			kfree(iopf);
+	}
+
+	kfree(group);
+}
 
 static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
 			       enum iommu_page_response_code status)
@@ -71,9 +71,9 @@  static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
 
 static void iopf_handler(struct work_struct *work)
 {
+	struct iopf_fault *iopf;
 	struct iopf_group *group;
 	struct iommu_domain *domain;
-	struct iopf_fault *iopf, *next;
 	enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
 
 	group = container_of(work, struct iopf_group, work);
@@ -82,7 +82,7 @@  static void iopf_handler(struct work_struct *work)
 	if (!domain || !domain->iopf_handler)
 		status = IOMMU_PAGE_RESP_INVALID;
 
-	list_for_each_entry_safe(iopf, next, &group->faults, list) {
+	list_for_each_entry(iopf, &group->faults, list) {
 		/*
 		 * For the moment, errors are sticky: don't handle subsequent
 		 * faults in the group if there is an error.
@@ -90,14 +90,20 @@  static void iopf_handler(struct work_struct *work)
 		if (status == IOMMU_PAGE_RESP_SUCCESS)
 			status = domain->iopf_handler(&iopf->fault,
 						      domain->fault_data);
-
-		if (!(iopf->fault.prm.flags &
-		      IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
-			kfree(iopf);
 	}
 
 	iopf_complete_group(group->dev, &group->last_fault, status);
-	kfree(group);
+	iopf_free_group(group);
+}
+
+static int iopf_queue_work(struct iopf_group *group, work_func_t func)
+{
+	struct iopf_device_param *iopf_param = group->dev->iommu->iopf_param;
+
+	INIT_WORK(&group->work, func);
+	queue_work(iopf_param->queue->wq, &group->work);
+
+	return 0;
 }
 
 /**
@@ -190,7 +196,6 @@  int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
 	group->last_fault.fault = *fault;
 	INIT_LIST_HEAD(&group->faults);
 	list_add(&group->last_fault.list, &group->faults);
-	INIT_WORK(&group->work, iopf_handler);
 
 	/* See if we have partial faults for this group */
 	list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
@@ -199,8 +204,11 @@  int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
 			list_move(&iopf->list, &group->faults);
 	}
 
-	queue_work(iopf_param->queue->wq, &group->work);
-	return 0;
+	ret = iopf_queue_work(group, iopf_handler);
+	if (ret)
+		iopf_free_group(group);
+
+	return ret;
 
 cleanup_partial:
 	list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {