[v7,9/9] iommu/vt-d: Add iotlb flush for nested domain

Message ID 20231221153948.119007-10-yi.l.liu@intel.com
State New
Headers
Series Add iommufd nesting (part 2/2) |

Commit Message

Yi Liu Dec. 21, 2023, 3:39 p.m. UTC
  From: Lu Baolu <baolu.lu@linux.intel.com>

This implements the .cache_invalidate_user() callback to support iotlb
flush for nested domain.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Co-developed-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/iommu/intel/nested.c | 116 +++++++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)
  

Comments

Yang, Weijiang Dec. 22, 2023, 3:56 a.m. UTC | #1
On 12/21/2023 11:39 PM, Yi Liu wrote:
> From: Lu Baolu <baolu.lu@linux.intel.com>
>
> This implements the .cache_invalidate_user() callback to support iotlb
> flush for nested domain.
>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> Co-developed-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> ---
>   drivers/iommu/intel/nested.c | 116 +++++++++++++++++++++++++++++++++++
>   1 file changed, 116 insertions(+)
>
> diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
> index b5a5563ab32c..c665e2647045 100644
> --- a/drivers/iommu/intel/nested.c
> +++ b/drivers/iommu/intel/nested.c
> @@ -73,9 +73,125 @@ static void intel_nested_domain_free(struct iommu_domain *domain)
>   	kfree(to_dmar_domain(domain));
>   }
>   
> +static void nested_flush_pasid_iotlb(struct intel_iommu *iommu,
> +				     struct dmar_domain *domain, u64 addr,
> +				     unsigned long npages, bool ih)
> +{
> +	u16 did = domain_id_iommu(domain, iommu);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&domain->lock, flags);
> +	if (!list_empty(&domain->devices))
> +		qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr,
> +				npages, ih, NULL);
> +	spin_unlock_irqrestore(&domain->lock, flags);
> +}
> +
> +static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr,
> +				   unsigned mask, u32 *fault)
> +{
> +	struct device_domain_info *info;
> +	unsigned long flags;
> +	u16 sid, qdep;
> +
> +	spin_lock_irqsave(&domain->lock, flags);
> +	list_for_each_entry(info, &domain->devices, link) {
> +		if (!info->ats_enabled)
> +			continue;
> +		sid = info->bus << 8 | info->devfn;
> +		qdep = info->ats_qdep;
> +		qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
> +				   qdep, addr, mask, fault);
> +		quirk_extra_dev_tlb_flush(info, addr, mask,
> +					  IOMMU_NO_PASID, qdep);
> +	}
> +	spin_unlock_irqrestore(&domain->lock, flags);
> +}
> +
> +static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr,
> +				     unsigned long npages, u32 *error)
> +{
> +	struct iommu_domain_info *info;
> +	unsigned long i;
> +	unsigned mask;
> +	u32 fault = 0;
> +
> +	if (npages == U64_MAX)
> +		mask = 64 - VTD_PAGE_SHIFT;
> +	else
> +		mask = ilog2(__roundup_pow_of_two(npages));
> +
> +	xa_for_each(&domain->iommu_array, i, info) {
> +		nested_flush_pasid_iotlb(info->iommu, domain, addr, npages, 0);
> +
> +		if (domain->has_iotlb_device)
> +			continue;

Shouldn't this be if (!domain->has_iotlb_device)?
> +
> +		nested_flush_dev_iotlb(domain, addr, mask, &fault);
> +		if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE))
> +			break;
> +	}
> +
> +	if (fault & DMA_FSTS_ICE)
> +		*error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ICE;
> +	if (fault & DMA_FSTS_ITE)
> +		*error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ITE;
> +}
> +
> +static int intel_nested_cache_invalidate_user(struct iommu_domain *domain,
> +					      struct iommu_user_data_array *array)
> +{
> +	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> +	struct iommu_hwpt_vtd_s1_invalidate inv_entry;
> +	u32 processed = 0;
> +	int ret = 0;
> +	u32 index;
> +
> +	if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	for (index = 0; index < array->entry_num; index++) {
> +		ret = iommu_copy_struct_from_user_array(&inv_entry, array,
> +							IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
> +							index, inv_error);
> +		if (ret)
> +			break;
> +
> +		if (inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) {
> +			ret = -EOPNOTSUPP;
> +			break;
> +		}
> +
> +		if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
> +		    ((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
> +			ret = -EINVAL;
> +			break;
> +		}
> +
> +		inv_entry.inv_error = 0;
> +		intel_nested_flush_cache(dmar_domain, inv_entry.addr,
> +					 inv_entry.npages, &inv_entry.inv_error);
> +
> +		ret = iommu_respond_struct_to_user_array(array, index,
> +							 (void *)&inv_entry,
> +							 sizeof(inv_entry));
> +		if (ret)
> +			break;
> +
> +		processed++;
> +	}
> +
> +out:
> +	array->entry_num = processed;
> +	return ret;
> +}
> +
>   static const struct iommu_domain_ops intel_nested_domain_ops = {
>   	.attach_dev		= intel_nested_attach_dev,
>   	.free			= intel_nested_domain_free,
> +	.cache_invalidate_user	= intel_nested_cache_invalidate_user,
>   };
>   
>   struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
  
Tian, Kevin Dec. 22, 2023, 6:47 a.m. UTC | #2
> From: Yang, Weijiang <weijiang.yang@intel.com>
> Sent: Friday, December 22, 2023 11:56 AM
> > +
> > +	xa_for_each(&domain->iommu_array, i, info) {
> > +		nested_flush_pasid_iotlb(info->iommu, domain, addr,
> npages, 0);
> > +
> > +		if (domain->has_iotlb_device)
> > +			continue;
> 
> Shouldn't this be if (!domain->has_iotlb_device)?

yes that is wrong.

actually it's weird to put domain check in a loop of domain->iommu_array.

that check along with devtlb flush should be done out of that loop.
  
Tian, Kevin Dec. 22, 2023, 6:57 a.m. UTC | #3
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Thursday, December 21, 2023 11:40 PM
> 
> +
> +static void intel_nested_flush_cache(struct dmar_domain *domain, u64
> addr,
> +				     unsigned long npages, u32 *error)
> +{
> +	struct iommu_domain_info *info;
> +	unsigned long i;
> +	unsigned mask;
> +	u32 fault = 0;
> +
> +	if (npages == U64_MAX)
> +		mask = 64 - VTD_PAGE_SHIFT;
> +	else
> +		mask = ilog2(__roundup_pow_of_two(npages));
> +
> +	xa_for_each(&domain->iommu_array, i, info) {
> +		nested_flush_pasid_iotlb(info->iommu, domain, addr,
> npages, 0);

so IOMMU_VTD_INV_FLAGS_LEAF is defined but ignored?

> +
> +		if (domain->has_iotlb_device)
> +			continue;
> +
> +		nested_flush_dev_iotlb(domain, addr, mask, &fault);
> +		if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE))
> +			break;

here you may add a note that we don't plan to forward invalidation 
queue error (i.e. IQE) to the caller as it's caused only by driver
internal bug.

> +
> +		if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
> +		    ((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
> +			ret = -EINVAL;
> +			break;
> +		}
> +

why is [non-zero-addr, U64_MAX] an error? Is it explicitly stated to
be not supported by underlying helpers?
  
Yi Liu Dec. 22, 2023, 7 a.m. UTC | #4
> On Dec 22, 2023, at 11:56, Yang, Weijiang <weijiang.yang@intel.com> wrote:
> 
> On 12/21/2023 11:39 PM, Yi Liu wrote:
>> From: Lu Baolu <baolu.lu@linux.intel.com>
>> 
>> This implements the .cache_invalidate_user() callback to support iotlb
>> flush for nested domain.
>> 
>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>> Co-developed-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> ---
>>  drivers/iommu/intel/nested.c | 116 +++++++++++++++++++++++++++++++++++
>>  1 file changed, 116 insertions(+)
>> 
>> diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
>> index b5a5563ab32c..c665e2647045 100644
>> --- a/drivers/iommu/intel/nested.c
>> +++ b/drivers/iommu/intel/nested.c
>> @@ -73,9 +73,125 @@ static void intel_nested_domain_free(struct iommu_domain *domain)
>>      kfree(to_dmar_domain(domain));
>>  }
>>  +static void nested_flush_pasid_iotlb(struct intel_iommu *iommu,
>> +                     struct dmar_domain *domain, u64 addr,
>> +                     unsigned long npages, bool ih)
>> +{
>> +    u16 did = domain_id_iommu(domain, iommu);
>> +    unsigned long flags;
>> +
>> +    spin_lock_irqsave(&domain->lock, flags);
>> +    if (!list_empty(&domain->devices))
>> +        qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr,
>> +                npages, ih, NULL);
>> +    spin_unlock_irqrestore(&domain->lock, flags);
>> +}
>> +
>> +static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr,
>> +                   unsigned mask, u32 *fault)
>> +{
>> +    struct device_domain_info *info;
>> +    unsigned long flags;
>> +    u16 sid, qdep;
>> +
>> +    spin_lock_irqsave(&domain->lock, flags);
>> +    list_for_each_entry(info, &domain->devices, link) {
>> +        if (!info->ats_enabled)
>> +            continue;
>> +        sid = info->bus << 8 | info->devfn;
>> +        qdep = info->ats_qdep;
>> +        qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
>> +                   qdep, addr, mask, fault);
>> +        quirk_extra_dev_tlb_flush(info, addr, mask,
>> +                      IOMMU_NO_PASID, qdep);
>> +    }
>> +    spin_unlock_irqrestore(&domain->lock, flags);
>> +}
>> +
>> +static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr,
>> +                     unsigned long npages, u32 *error)
>> +{
>> +    struct iommu_domain_info *info;
>> +    unsigned long i;
>> +    unsigned mask;
>> +    u32 fault = 0;
>> +
>> +    if (npages == U64_MAX)
>> +        mask = 64 - VTD_PAGE_SHIFT;
>> +    else
>> +        mask = ilog2(__roundup_pow_of_two(npages));
>> +
>> +    xa_for_each(&domain->iommu_array, i, info) {
>> +        nested_flush_pasid_iotlb(info->iommu, domain, addr, npages, 0);
>> +
>> +        if (domain->has_iotlb_device)
>> +            continue;
> 
> Shouldn't this be if (!domain->has_iotlb_device)?

oops, yes it is.

>> +
>> +        nested_flush_dev_iotlb(domain, addr, mask, &fault);
>> +        if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE))
>> +            break;
>> +    }
>> +
>> +    if (fault & DMA_FSTS_ICE)
>> +        *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ICE;
>> +    if (fault & DMA_FSTS_ITE)
>> +        *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ITE;
>> +}
>> +
>> +static int intel_nested_cache_invalidate_user(struct iommu_domain *domain,
>> +                          struct iommu_user_data_array *array)
>> +{
>> +    struct dmar_domain *dmar_domain = to_dmar_domain(domain);
>> +    struct iommu_hwpt_vtd_s1_invalidate inv_entry;
>> +    u32 processed = 0;
>> +    int ret = 0;
>> +    u32 index;
>> +
>> +    if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) {
>> +        ret = -EINVAL;
>> +        goto out;
>> +    }
>> +
>> +    for (index = 0; index < array->entry_num; index++) {
>> +        ret = iommu_copy_struct_from_user_array(&inv_entry, array,
>> +                            IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
>> +                            index, inv_error);
>> +        if (ret)
>> +            break;
>> +
>> +        if (inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) {
>> +            ret = -EOPNOTSUPP;
>> +            break;
>> +        }
>> +
>> +        if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
>> +            ((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
>> +            ret = -EINVAL;
>> +            break;
>> +        }
>> +
>> +        inv_entry.inv_error = 0;
>> +        intel_nested_flush_cache(dmar_domain, inv_entry.addr,
>> +                     inv_entry.npages, &inv_entry.inv_error);
>> +
>> +        ret = iommu_respond_struct_to_user_array(array, index,
>> +                             (void *)&inv_entry,
>> +                             sizeof(inv_entry));
>> +        if (ret)
>> +            break;
>> +
>> +        processed++;
>> +    }
>> +
>> +out:
>> +    array->entry_num = processed;
>> +    return ret;
>> +}
>> +
>>  static const struct iommu_domain_ops intel_nested_domain_ops = {
>>      .attach_dev        = intel_nested_attach_dev,
>>      .free            = intel_nested_domain_free,
>> +    .cache_invalidate_user    = intel_nested_cache_invalidate_user,
>>  };
>>    struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
>
  
Yi Liu Dec. 22, 2023, 7:01 a.m. UTC | #5
> On Dec 22, 2023, at 14:47, Tian, Kevin <kevin.tian@intel.com> wrote:
> 
> 
>> 
>> From: Yang, Weijiang <weijiang.yang@intel.com>
>> Sent: Friday, December 22, 2023 11:56 AM
>>> +
>>> +    xa_for_each(&domain->iommu_array, i, info) {
>>> +        nested_flush_pasid_iotlb(info->iommu, domain, addr,
>> npages, 0);
>>> +
>>> +        if (domain->has_iotlb_device)
>>> +            continue;
>> 
>> Shouldn't this be if (!domain->has_iotlb_device)?
> 
> yes that is wrong.
> 
> actually it's weird to put domain check in a loop of domain->iommu_array.
> 
> that check along with devtlb flush should be done out of that loop.

Maybe adding a bool, set it out of the loop, check the bool in the loop.
  
Tian, Kevin Dec. 22, 2023, 7:12 a.m. UTC | #6
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Friday, December 22, 2023 3:02 PM
> 
> 
> > On Dec 22, 2023, at 14:47, Tian, Kevin <kevin.tian@intel.com> wrote:
> >
> > 
> >>
> >> From: Yang, Weijiang <weijiang.yang@intel.com>
> >> Sent: Friday, December 22, 2023 11:56 AM
> >>> +
> >>> +    xa_for_each(&domain->iommu_array, i, info) {
> >>> +        nested_flush_pasid_iotlb(info->iommu, domain, addr,
> >> npages, 0);
> >>> +
> >>> +        if (domain->has_iotlb_device)
> >>> +            continue;
> >>
> >> Shouldn't this be if (!domain->has_iotlb_device)?
> >
> > yes that is wrong.
> >
> > actually it's weird to put domain check in a loop of domain->iommu_array.
> >
> > that check along with devtlb flush should be done out of that loop.
> 
> Maybe adding a bool, set it out of the loop, check the bool in the loop.

the point is that dev iotlb doesn't rely on info->iommu:

	nested_flush_dev_iotlb(domain, addr, mask, &fault);

then why do it in the loop of info->iommu?
  
Yi Liu Dec. 22, 2023, 11:59 a.m. UTC | #7
> On Dec 22, 2023, at 15:12, Tian, Kevin <kevin.tian@intel.com> wrote:
> 
> 
>> 
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Sent: Friday, December 22, 2023 3:02 PM
>> 
>> 
>>>> On Dec 22, 2023, at 14:47, Tian, Kevin <kevin.tian@intel.com> wrote:
>>> 
>>> 
>>>> 
>>>> From: Yang, Weijiang <weijiang.yang@intel.com>
>>>> Sent: Friday, December 22, 2023 11:56 AM
>>>>> +
>>>>> +    xa_for_each(&domain->iommu_array, i, info) {
>>>>> +        nested_flush_pasid_iotlb(info->iommu, domain, addr,
>>>> npages, 0);
>>>>> +
>>>>> +        if (domain->has_iotlb_device)
>>>>> +            continue;
>>>> 
>>>> Shouldn't this be if (!domain->has_iotlb_device)?
>>> 
>>> yes that is wrong.
>>> 
>>> actually it's weird to put domain check in a loop of domain->iommu_array.
>>> 
>>> that check along with devtlb flush should be done out of that loop.
>> 
>> Maybe adding a bool, set it out of the loop, check the bool in the loop.
> 
> the point is that dev iotlb doesn't rely on info->iommu:
> 
>    nested_flush_dev_iotlb(domain, addr, mask, &fault);
> 
> then why do it in the loop of info->iommu?

yes. It should have another device loop instead.
  
Yi Liu Dec. 26, 2023, 4:51 a.m. UTC | #8
On 2023/12/22 14:57, Tian, Kevin wrote:
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Sent: Thursday, December 21, 2023 11:40 PM
>>
>> +
>> +static void intel_nested_flush_cache(struct dmar_domain *domain, u64
>> addr,
>> +				     unsigned long npages, u32 *error)
>> +{
>> +	struct iommu_domain_info *info;
>> +	unsigned long i;
>> +	unsigned mask;
>> +	u32 fault = 0;
>> +
>> +	if (npages == U64_MAX)
>> +		mask = 64 - VTD_PAGE_SHIFT;
>> +	else
>> +		mask = ilog2(__roundup_pow_of_two(npages));
>> +
>> +	xa_for_each(&domain->iommu_array, i, info) {
>> +		nested_flush_pasid_iotlb(info->iommu, domain, addr,
>> npages, 0);
> 
> so IOMMU_VTD_INV_FLAGS_LEAF is defined but ignored?

yeah... it is. It is named as ih in the driver code. But it appears only
the below code is set ih. When calling iommu_flush_iotlb_psi(), the 5th
parameter (ih) may be true.

static int intel_iommu_memory_notifier(struct notifier_block *nb,
				       unsigned long val, void *v)
{
	struct memory_notify *mhp = v;
	unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
	unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
			mhp->nr_pages - 1);

	switch (val) {
	case MEM_GOING_ONLINE:
		if (iommu_domain_identity_map(si_domain,
					      start_vpfn, last_vpfn)) {
			pr_warn("Failed to build identity map for [%lx-%lx]\n",
				start_vpfn, last_vpfn);
			return NOTIFY_BAD;
		}
		break;

	case MEM_OFFLINE:
	case MEM_CANCEL_ONLINE:
		{
			struct dmar_drhd_unit *drhd;
			struct intel_iommu *iommu;
			LIST_HEAD(freelist);

			domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist);

			rcu_read_lock();
			for_each_active_iommu(iommu, drhd)
				iommu_flush_iotlb_psi(iommu, si_domain,
					start_vpfn, mhp->nr_pages,
					list_empty(&freelist), 0);
			rcu_read_unlock();
			put_pages_list(&freelist);
		}
		break;
	}

	return NOTIFY_OK;
}

> 
>> +
>> +		if (domain->has_iotlb_device)
>> +			continue;
>> +
>> +		nested_flush_dev_iotlb(domain, addr, mask, &fault);
>> +		if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE))
>> +			break;
> 
> here you may add a note that we don't plan to forward invalidation
> queue error (i.e. IQE) to the caller as it's caused only by driver
> internal bug.

yes.

> 
>> +
>> +		if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
>> +		    ((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
>> +			ret = -EINVAL;
>> +			break;
>> +		}
>> +
> 
> why is [non-zero-addr, U64_MAX] an error? Is it explicitly stated to
> be not supported by underlying helpers?

no such limitation by underlying helpers. But in such case, the 
addr+npages*PAGE_SIZE would exceed U64_MAX, this seems a bit
strange. But I'm fine to relax the check since the underlying helper
only checks npages when determining paid-selective or not.
  
Tian, Kevin Dec. 26, 2023, 6:11 a.m. UTC | #9
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Tuesday, December 26, 2023 12:52 PM
> >> +
> >> +		if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
> >> +		    ((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
> >> +			ret = -EINVAL;
> >> +			break;
> >> +		}
> >> +
> >
> > why is [non-zero-addr, U64_MAX] an error? Is it explicitly stated to
> > be not supported by underlying helpers?
> 
> no such limitation by underlying helpers. But in such case, the
> addr+npages*PAGE_SIZE would exceed U64_MAX, this seems a bit
> strange. But I'm fine to relax the check since the underlying helper
> only checks npages when determining paid-selective or not.
> 

I overlooked npages as end. let's keep the check.
  
Yi Liu Dec. 26, 2023, 8:46 a.m. UTC | #10
On 2023/12/22 19:59, Liu, Yi L wrote:
> 
>> On Dec 22, 2023, at 15:12, Tian, Kevin <kevin.tian@intel.com> wrote:
>>
>> 
>>>
>>> From: Liu, Yi L <yi.l.liu@intel.com>
>>> Sent: Friday, December 22, 2023 3:02 PM
>>>
>>>
>>>>> On Dec 22, 2023, at 14:47, Tian, Kevin <kevin.tian@intel.com> wrote:
>>>>
>>>>
>>>>>
>>>>> From: Yang, Weijiang <weijiang.yang@intel.com>
>>>>> Sent: Friday, December 22, 2023 11:56 AM
>>>>>> +
>>>>>> +    xa_for_each(&domain->iommu_array, i, info) {
>>>>>> +        nested_flush_pasid_iotlb(info->iommu, domain, addr,
>>>>> npages, 0);
>>>>>> +
>>>>>> +        if (domain->has_iotlb_device)
>>>>>> +            continue;
>>>>>
>>>>> Shouldn't this be if (!domain->has_iotlb_device)?
>>>>
>>>> yes that is wrong.
>>>>
>>>> actually it's weird to put domain check in a loop of domain->iommu_array.
>>>>
>>>> that check along with devtlb flush should be done out of that loop.
>>>
>>> Maybe adding a bool, set it out of the loop, check the bool in the loop.
>>
>> the point is that dev iotlb doesn't rely on info->iommu:
>>
>>     nested_flush_dev_iotlb(domain, addr, mask, &fault);
>>
>> then why do it in the loop of info->iommu?
> 
> yes. It should have another device loop instead.

let me move the device tlb related code out of the info->iommu loop.
  
Yi Liu Dec. 26, 2023, 12:35 p.m. UTC | #11
On 2023/12/26 12:51, Yi Liu wrote:
> On 2023/12/22 14:57, Tian, Kevin wrote:
>>> From: Liu, Yi L <yi.l.liu@intel.com>
>>> Sent: Thursday, December 21, 2023 11:40 PM
>>>
>>> +
>>> +static void intel_nested_flush_cache(struct dmar_domain *domain, u64
>>> addr,
>>> +                     unsigned long npages, u32 *error)
>>> +{
>>> +    struct iommu_domain_info *info;
>>> +    unsigned long i;
>>> +    unsigned mask;
>>> +    u32 fault = 0;
>>> +
>>> +    if (npages == U64_MAX)
>>> +        mask = 64 - VTD_PAGE_SHIFT;
>>> +    else
>>> +        mask = ilog2(__roundup_pow_of_two(npages));
>>> +
>>> +    xa_for_each(&domain->iommu_array, i, info) {
>>> +        nested_flush_pasid_iotlb(info->iommu, domain, addr,
>>> npages, 0);
>>
>> so IOMMU_VTD_INV_FLAGS_LEAF is defined but ignored?
> 
> yeah... it is. It is named as ih in the driver code. But it appears only
> the below code is set ih. When calling iommu_flush_iotlb_psi(), the 5th
> parameter (ih) may be true.
> 
> static int intel_iommu_memory_notifier(struct notifier_block *nb,
>                         unsigned long val, void *v)
> {
>      struct memory_notify *mhp = v;
>      unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
>      unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
>              mhp->nr_pages - 1);
> 
>      switch (val) {
>      case MEM_GOING_ONLINE:
>          if (iommu_domain_identity_map(si_domain,
>                            start_vpfn, last_vpfn)) {
>              pr_warn("Failed to build identity map for [%lx-%lx]\n",
>                  start_vpfn, last_vpfn);
>              return NOTIFY_BAD;
>          }
>          break;
> 
>      case MEM_OFFLINE:
>      case MEM_CANCEL_ONLINE:
>          {
>              struct dmar_drhd_unit *drhd;
>              struct intel_iommu *iommu;
>              LIST_HEAD(freelist);
> 
>              domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist);
> 
>              rcu_read_lock();
>              for_each_active_iommu(iommu, drhd)
>                  iommu_flush_iotlb_psi(iommu, si_domain,
>                      start_vpfn, mhp->nr_pages,
>                      list_empty(&freelist), 0);
>              rcu_read_unlock();
>              put_pages_list(&freelist);
>          }
>          break;
>      }
> 
>      return NOTIFY_OK;
> }

I passed this flag to the intel_nested_flush_cache() now as the
helper accepts an ih parameter.
  
Duan, Zhenzhong Dec. 27, 2023, 9:27 a.m. UTC | #12
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: [PATCH v7 9/9] iommu/vt-d: Add iotlb flush for nested domain
>
>From: Lu Baolu <baolu.lu@linux.intel.com>
>
>This implements the .cache_invalidate_user() callback to support iotlb
>flush for nested domain.
>
>Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>Co-developed-by: Yi Liu <yi.l.liu@intel.com>
>Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>---
> drivers/iommu/intel/nested.c | 116
>+++++++++++++++++++++++++++++++++++
> 1 file changed, 116 insertions(+)
>
>diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
>index b5a5563ab32c..c665e2647045 100644
>--- a/drivers/iommu/intel/nested.c
>+++ b/drivers/iommu/intel/nested.c
>@@ -73,9 +73,125 @@ static void intel_nested_domain_free(struct
>iommu_domain *domain)
> 	kfree(to_dmar_domain(domain));
> }
>
>+static void nested_flush_pasid_iotlb(struct intel_iommu *iommu,
>+				     struct dmar_domain *domain, u64 addr,
>+				     unsigned long npages, bool ih)
>+{
>+	u16 did = domain_id_iommu(domain, iommu);
>+	unsigned long flags;
>+
>+	spin_lock_irqsave(&domain->lock, flags);
>+	if (!list_empty(&domain->devices))
>+		qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr,
>+				npages, ih, NULL);

Is it optimal to check if domain attached to iommu before trigger flush?
Or the check is redundant if intel_nested_flush_cache() is the only call site.

Thanks
Zhenzhong

>+	spin_unlock_irqrestore(&domain->lock, flags);
>+}
>+
>+static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr,
>+				   unsigned mask, u32 *fault)
>+{
>+	struct device_domain_info *info;
>+	unsigned long flags;
>+	u16 sid, qdep;
>+
>+	spin_lock_irqsave(&domain->lock, flags);
>+	list_for_each_entry(info, &domain->devices, link) {
>+		if (!info->ats_enabled)
>+			continue;
>+		sid = info->bus << 8 | info->devfn;
>+		qdep = info->ats_qdep;
>+		qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
>+				   qdep, addr, mask, fault);
>+		quirk_extra_dev_tlb_flush(info, addr, mask,
>+					  IOMMU_NO_PASID, qdep);
>+	}
>+	spin_unlock_irqrestore(&domain->lock, flags);
>+}
>+
>+static void intel_nested_flush_cache(struct dmar_domain *domain, u64
>addr,
>+				     unsigned long npages, u32 *error)
>+{
>+	struct iommu_domain_info *info;
>+	unsigned long i;
>+	unsigned mask;
>+	u32 fault = 0;
>+
>+	if (npages == U64_MAX)
>+		mask = 64 - VTD_PAGE_SHIFT;
>+	else
>+		mask = ilog2(__roundup_pow_of_two(npages));
>+
>+	xa_for_each(&domain->iommu_array, i, info) {
>+		nested_flush_pasid_iotlb(info->iommu, domain, addr,
>npages, 0);
>+
>+		if (domain->has_iotlb_device)
>+			continue;
>+
>+		nested_flush_dev_iotlb(domain, addr, mask, &fault);
>+		if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE))
>+			break;
>+	}
>+
>+	if (fault & DMA_FSTS_ICE)
>+		*error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ICE;
>+	if (fault & DMA_FSTS_ITE)
>+		*error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ITE;
>+}
>+
>+static int intel_nested_cache_invalidate_user(struct iommu_domain
>*domain,
>+					      struct iommu_user_data_array
>*array)
>+{
>+	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
>+	struct iommu_hwpt_vtd_s1_invalidate inv_entry;
>+	u32 processed = 0;
>+	int ret = 0;
>+	u32 index;
>+
>+	if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) {
>+		ret = -EINVAL;
>+		goto out;
>+	}
>+
>+	for (index = 0; index < array->entry_num; index++) {
>+		ret = iommu_copy_struct_from_user_array(&inv_entry,
>array,
>+
>	IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
>+							index, inv_error);
>+		if (ret)
>+			break;
>+
>+		if (inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) {
>+			ret = -EOPNOTSUPP;
>+			break;
>+		}
>+
>+		if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
>+		    ((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
>+			ret = -EINVAL;
>+			break;
>+		}
>+
>+		inv_entry.inv_error = 0;
>+		intel_nested_flush_cache(dmar_domain, inv_entry.addr,
>+					 inv_entry.npages,
>&inv_entry.inv_error);
>+
>+		ret = iommu_respond_struct_to_user_array(array, index,
>+							 (void *)&inv_entry,
>+							 sizeof(inv_entry));
>+		if (ret)
>+			break;
>+
>+		processed++;
>+	}
>+
>+out:
>+	array->entry_num = processed;
>+	return ret;
>+}
>+
> static const struct iommu_domain_ops intel_nested_domain_ops = {
> 	.attach_dev		= intel_nested_attach_dev,
> 	.free			= intel_nested_domain_free,
>+	.cache_invalidate_user	= intel_nested_cache_invalidate_user,
> };
>
> struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain
>*parent,
>--
>2.34.1
  
Yi Liu Dec. 27, 2023, 2:14 p.m. UTC | #13
On 2023/12/27 17:27, Duan, Zhenzhong wrote:
> 
> 
>> -----Original Message-----
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Subject: [PATCH v7 9/9] iommu/vt-d: Add iotlb flush for nested domain
>>
>> From: Lu Baolu <baolu.lu@linux.intel.com>
>>
>> This implements the .cache_invalidate_user() callback to support iotlb
>> flush for nested domain.
>>
>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>> Co-developed-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> ---
>> drivers/iommu/intel/nested.c | 116
>> +++++++++++++++++++++++++++++++++++
>> 1 file changed, 116 insertions(+)
>>
>> diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
>> index b5a5563ab32c..c665e2647045 100644
>> --- a/drivers/iommu/intel/nested.c
>> +++ b/drivers/iommu/intel/nested.c
>> @@ -73,9 +73,125 @@ static void intel_nested_domain_free(struct
>> iommu_domain *domain)
>> 	kfree(to_dmar_domain(domain));
>> }
>>
>> +static void nested_flush_pasid_iotlb(struct intel_iommu *iommu,
>> +				     struct dmar_domain *domain, u64 addr,
>> +				     unsigned long npages, bool ih)
>> +{
>> +	u16 did = domain_id_iommu(domain, iommu);
>> +	unsigned long flags;
>> +
>> +	spin_lock_irqsave(&domain->lock, flags);
>> +	if (!list_empty(&domain->devices))
>> +		qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr,
>> +				npages, ih, NULL);
> 
> Is it optimal to check if domain attached to iommu before trigger flush?
> Or the check is redundant if intel_nested_flush_cache() is the only call site.

I think it is possible that userspace issue an invalidation on a hwpt which
does not have any device attached.. Though this is something stupid. So
checking if any device attached before flushing still makes sense.

> Thanks
> Zhenzhong
> 
>> +	spin_unlock_irqrestore(&domain->lock, flags);
>> +}
>> +
>> +static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr,
>> +				   unsigned mask, u32 *fault)
>> +{
>> +	struct device_domain_info *info;
>> +	unsigned long flags;
>> +	u16 sid, qdep;
>> +
>> +	spin_lock_irqsave(&domain->lock, flags);
>> +	list_for_each_entry(info, &domain->devices, link) {
>> +		if (!info->ats_enabled)
>> +			continue;
>> +		sid = info->bus << 8 | info->devfn;
>> +		qdep = info->ats_qdep;
>> +		qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
>> +				   qdep, addr, mask, fault);
>> +		quirk_extra_dev_tlb_flush(info, addr, mask,
>> +					  IOMMU_NO_PASID, qdep);
>> +	}
>> +	spin_unlock_irqrestore(&domain->lock, flags);
>> +}
>> +
>> +static void intel_nested_flush_cache(struct dmar_domain *domain, u64
>> addr,
>> +				     unsigned long npages, u32 *error)
>> +{
>> +	struct iommu_domain_info *info;
>> +	unsigned long i;
>> +	unsigned mask;
>> +	u32 fault = 0;
>> +
>> +	if (npages == U64_MAX)
>> +		mask = 64 - VTD_PAGE_SHIFT;
>> +	else
>> +		mask = ilog2(__roundup_pow_of_two(npages));
>> +
>> +	xa_for_each(&domain->iommu_array, i, info) {
>> +		nested_flush_pasid_iotlb(info->iommu, domain, addr,
>> npages, 0);
>> +
>> +		if (domain->has_iotlb_device)
>> +			continue;
>> +
>> +		nested_flush_dev_iotlb(domain, addr, mask, &fault);
>> +		if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE))
>> +			break;
>> +	}
>> +
>> +	if (fault & DMA_FSTS_ICE)
>> +		*error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ICE;
>> +	if (fault & DMA_FSTS_ITE)
>> +		*error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ITE;
>> +}
>> +
>> +static int intel_nested_cache_invalidate_user(struct iommu_domain
>> *domain,
>> +					      struct iommu_user_data_array
>> *array)
>> +{
>> +	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
>> +	struct iommu_hwpt_vtd_s1_invalidate inv_entry;
>> +	u32 processed = 0;
>> +	int ret = 0;
>> +	u32 index;
>> +
>> +	if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) {
>> +		ret = -EINVAL;
>> +		goto out;
>> +	}
>> +
>> +	for (index = 0; index < array->entry_num; index++) {
>> +		ret = iommu_copy_struct_from_user_array(&inv_entry,
>> array,
>> +
>> 	IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
>> +							index, inv_error);
>> +		if (ret)
>> +			break;
>> +
>> +		if (inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) {
>> +			ret = -EOPNOTSUPP;
>> +			break;
>> +		}
>> +
>> +		if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
>> +		    ((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
>> +			ret = -EINVAL;
>> +			break;
>> +		}
>> +
>> +		inv_entry.inv_error = 0;
>> +		intel_nested_flush_cache(dmar_domain, inv_entry.addr,
>> +					 inv_entry.npages,
>> &inv_entry.inv_error);
>> +
>> +		ret = iommu_respond_struct_to_user_array(array, index,
>> +							 (void *)&inv_entry,
>> +							 sizeof(inv_entry));
>> +		if (ret)
>> +			break;
>> +
>> +		processed++;
>> +	}
>> +
>> +out:
>> +	array->entry_num = processed;
>> +	return ret;
>> +}
>> +
>> static const struct iommu_domain_ops intel_nested_domain_ops = {
>> 	.attach_dev		= intel_nested_attach_dev,
>> 	.free			= intel_nested_domain_free,
>> +	.cache_invalidate_user	= intel_nested_cache_invalidate_user,
>> };
>>
>> struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain
>> *parent,
>> --
>> 2.34.1
>
  

Patch

diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index b5a5563ab32c..c665e2647045 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -73,9 +73,125 @@  static void intel_nested_domain_free(struct iommu_domain *domain)
 	kfree(to_dmar_domain(domain));
 }
 
+static void nested_flush_pasid_iotlb(struct intel_iommu *iommu,
+				     struct dmar_domain *domain, u64 addr,
+				     unsigned long npages, bool ih)
+{
+	u16 did = domain_id_iommu(domain, iommu);
+	unsigned long flags;
+
+	spin_lock_irqsave(&domain->lock, flags);
+	if (!list_empty(&domain->devices))
+		qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr,
+				npages, ih, NULL);
+	spin_unlock_irqrestore(&domain->lock, flags);
+}
+
+static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr,
+				   unsigned mask, u32 *fault)
+{
+	struct device_domain_info *info;
+	unsigned long flags;
+	u16 sid, qdep;
+
+	spin_lock_irqsave(&domain->lock, flags);
+	list_for_each_entry(info, &domain->devices, link) {
+		if (!info->ats_enabled)
+			continue;
+		sid = info->bus << 8 | info->devfn;
+		qdep = info->ats_qdep;
+		qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
+				   qdep, addr, mask, fault);
+		quirk_extra_dev_tlb_flush(info, addr, mask,
+					  IOMMU_NO_PASID, qdep);
+	}
+	spin_unlock_irqrestore(&domain->lock, flags);
+}
+
+static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr,
+				     unsigned long npages, u32 *error)
+{
+	struct iommu_domain_info *info;
+	unsigned long i;
+	unsigned mask;
+	u32 fault = 0;
+
+	if (npages == U64_MAX)
+		mask = 64 - VTD_PAGE_SHIFT;
+	else
+		mask = ilog2(__roundup_pow_of_two(npages));
+
+	xa_for_each(&domain->iommu_array, i, info) {
+		nested_flush_pasid_iotlb(info->iommu, domain, addr, npages, 0);
+
+		if (domain->has_iotlb_device)
+			continue;
+
+		nested_flush_dev_iotlb(domain, addr, mask, &fault);
+		if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE))
+			break;
+	}
+
+	if (fault & DMA_FSTS_ICE)
+		*error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ICE;
+	if (fault & DMA_FSTS_ITE)
+		*error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ITE;
+}
+
+static int intel_nested_cache_invalidate_user(struct iommu_domain *domain,
+					      struct iommu_user_data_array *array)
+{
+	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+	struct iommu_hwpt_vtd_s1_invalidate inv_entry;
+	u32 processed = 0;
+	int ret = 0;
+	u32 index;
+
+	if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	for (index = 0; index < array->entry_num; index++) {
+		ret = iommu_copy_struct_from_user_array(&inv_entry, array,
+							IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
+							index, inv_error);
+		if (ret)
+			break;
+
+		if (inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) {
+			ret = -EOPNOTSUPP;
+			break;
+		}
+
+		if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
+		    ((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
+			ret = -EINVAL;
+			break;
+		}
+
+		inv_entry.inv_error = 0;
+		intel_nested_flush_cache(dmar_domain, inv_entry.addr,
+					 inv_entry.npages, &inv_entry.inv_error);
+
+		ret = iommu_respond_struct_to_user_array(array, index,
+							 (void *)&inv_entry,
+							 sizeof(inv_entry));
+		if (ret)
+			break;
+
+		processed++;
+	}
+
+out:
+	array->entry_num = processed;
+	return ret;
+}
+
 static const struct iommu_domain_ops intel_nested_domain_ops = {
 	.attach_dev		= intel_nested_attach_dev,
 	.free			= intel_nested_domain_free,
+	.cache_invalidate_user	= intel_nested_cache_invalidate_user,
 };
 
 struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,