[4/5] iommu/vt-d: Add support for static identity domain
Commit Message
Add a global static identity domain with guaranteed attach semantics.
Software determines VT-d hardware support for pass-through translation by
inspecting the capability register. If pass-through translation is not
supported, the device is instructed to use DMA domain for its default
domain. While most recent VT-d hardware implementations support pass-
through translation, this capability is only lacking in some early VT-d
implementations.
With the static identity domain in place, the domain field of per-device
iommu driver data can be either a pointer to a DMA translation domain, or
NULL, indicating that the static identity domain is attached. Refine some
code to accommodate this change.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
drivers/iommu/intel/iommu.c | 129 ++++++++++++++++++++++++++++++++++--
drivers/iommu/intel/svm.c | 2 +-
2 files changed, 125 insertions(+), 6 deletions(-)
Comments
On Mon, Nov 20, 2023 at 07:29:43PM +0800, Lu Baolu wrote:
> @@ -2311,6 +2316,13 @@ static int device_def_domain_type(struct device *dev)
> return IOMMU_DOMAIN_IDENTITY;
> }
>
> + /*
> + * Hardware does not support the passthrough translation mode.
> + * Always use a dynamaic mapping domain.
> + */
> + if (!ecap_pass_through(iommu->ecap))
> + return IOMMU_DOMAIN_DMA;
> +
Doesn't this return from def_domain_type completely prevent using an
identity domain?
I thought the point of this was to allow the identity domain but have
it be translating?
Jason
On Wed, Nov 29, 2023 at 04:26:15PM -0400, Jason Gunthorpe wrote:
> On Mon, Nov 20, 2023 at 07:29:43PM +0800, Lu Baolu wrote:
>
> > @@ -2311,6 +2316,13 @@ static int device_def_domain_type(struct device *dev)
> > return IOMMU_DOMAIN_IDENTITY;
> > }
> >
> > + /*
> > + * Hardware does not support the passthrough translation mode.
> > + * Always use a dynamaic mapping domain.
> > + */
> > + if (!ecap_pass_through(iommu->ecap))
> > + return IOMMU_DOMAIN_DMA;
> > +
>
> Doesn't this return from def_domain_type completely prevent using an
> identity domain?
>
> I thought the point of this was to allow the identity domain but have
> it be translating?
I suppose the answer is the next patch deletes that stuff.
I would probably have structured this in the other order, first add
this hunk and say that old HW is being de-supported. Remove all the
now-dead code creating the 1:1 page table, then refactor the remainder
to create the global static.
Jason
On 2023/11/30 4:28, Jason Gunthorpe wrote:
> On Wed, Nov 29, 2023 at 04:26:15PM -0400, Jason Gunthorpe wrote:
>> On Mon, Nov 20, 2023 at 07:29:43PM +0800, Lu Baolu wrote:
>>
>>> @@ -2311,6 +2316,13 @@ static int device_def_domain_type(struct device *dev)
>>> return IOMMU_DOMAIN_IDENTITY;
>>> }
>>>
>>> + /*
>>> + * Hardware does not support the passthrough translation mode.
>>> + * Always use a dynamaic mapping domain.
>>> + */
>>> + if (!ecap_pass_through(iommu->ecap))
>>> + return IOMMU_DOMAIN_DMA;
>>> +
>> Doesn't this return from def_domain_type completely prevent using an
>> identity domain?
>>
>> I thought the point of this was to allow the identity domain but have
>> it be translating?
> I suppose the answer is the next patch deletes that stuff.
>
> I would probably have structured this in the other order, first add
> this hunk and say that old HW is being de-supported. Remove all the
> now-dead code creating the 1:1 page table, then refactor the remainder
> to create the global static.
Okay, that would be more readable.
Best regards,
baolu
@@ -1282,7 +1282,8 @@ static void iommu_enable_pci_caps(struct device_domain_info *info)
if (info->ats_supported && pci_ats_page_aligned(pdev) &&
!pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
info->ats_enabled = 1;
- domain_update_iotlb(info->domain);
+ if (info->domain)
+ domain_update_iotlb(info->domain);
}
}
@@ -1298,7 +1299,8 @@ static void iommu_disable_pci_caps(struct device_domain_info *info)
if (info->ats_enabled) {
pci_disable_ats(pdev);
info->ats_enabled = 0;
- domain_update_iotlb(info->domain);
+ if (info->domain)
+ domain_update_iotlb(info->domain);
}
if (info->pasid_enabled) {
@@ -2301,6 +2303,9 @@ static bool device_rmrr_is_relaxable(struct device *dev)
*/
static int device_def_domain_type(struct device *dev)
{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+
if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev);
@@ -2311,6 +2316,13 @@ static int device_def_domain_type(struct device *dev)
return IOMMU_DOMAIN_IDENTITY;
}
+ /*
+ * Hardware does not support the passthrough translation mode.
+ * Always use a dynamaic mapping domain.
+ */
+ if (!ecap_pass_through(iommu->ecap))
+ return IOMMU_DOMAIN_DMA;
+
return 0;
}
@@ -3712,6 +3724,9 @@ static void dmar_remove_one_dev_info(struct device *dev)
domain_context_clear(info);
}
+ if (!domain)
+ return;
+
spin_lock_irqsave(&domain->lock, flags);
list_del(&info->link);
spin_unlock_irqrestore(&domain->lock, flags);
@@ -3920,11 +3935,9 @@ int prepare_domain_attach_device(struct iommu_domain *domain,
static int intel_iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
- struct device_domain_info *info = dev_iommu_priv_get(dev);
int ret;
- if (info->domain)
- device_block_translation(dev);
+ device_block_translation(dev);
ret = prepare_domain_attach_device(domain, dev);
if (ret)
@@ -4529,6 +4542,9 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
goto out_tear_down;
}
+ if (domain->type == IOMMU_DOMAIN_IDENTITY)
+ goto out_tear_down;
+
dmar_domain = to_dmar_domain(domain);
spin_lock_irqsave(&dmar_domain->lock, flags);
list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) {
@@ -4703,8 +4719,111 @@ const struct iommu_dirty_ops intel_dirty_ops = {
.read_and_clear_dirty = intel_iommu_read_and_clear_dirty,
};
+static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ struct context_entry *context;
+
+ spin_lock(&iommu->lock);
+ context = iommu_context_addr(iommu, bus, devfn, 1);
+ if (!context) {
+ spin_unlock(&iommu->lock);
+ return -ENOMEM;
+ }
+
+ if (context_present(context) && !context_copied(iommu, bus, devfn)) {
+ spin_unlock(&iommu->lock);
+ return 0;
+ }
+
+ copied_context_tear_down(iommu, context, bus, devfn);
+ context_clear_entry(context);
+ context_set_domain_id(context, FLPT_DEFAULT_DID);
+
+ /*
+ * In pass through mode, AW must be programmed to indicate the largest
+ * AGAW value supported by hardware. And ASR is ignored by hardware.
+ */
+ context_set_address_width(context, iommu->msagaw);
+ context_set_translation_type(context, CONTEXT_TT_PASS_THROUGH);
+ context_set_fault_enable(context);
+ context_set_present(context);
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(context, sizeof(*context));
+ context_present_cache_flush(iommu, FLPT_DEFAULT_DID, bus, devfn);
+ spin_unlock(&iommu->lock);
+
+ return 0;
+}
+
+static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void *data)
+{
+ struct device *dev = data;
+
+ if (dev != &pdev->dev)
+ return 0;
+
+ return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff);
+}
+
+static int device_setup_pass_through(struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+
+ if (!dev_is_pci(dev))
+ return context_setup_pass_through(dev, info->bus, info->devfn);
+
+ return pci_for_each_dma_alias(to_pci_dev(dev),
+ context_setup_pass_through_cb, dev);
+}
+
+static int identity_domain_attach_dev(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ int ret;
+
+ device_block_translation(dev);
+
+ if (dev_is_real_dma_subdevice(dev))
+ return 0;
+
+ if (sm_supported(iommu)) {
+ ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID);
+ if (!ret)
+ iommu_enable_pci_caps(info);
+ } else {
+ ret = device_setup_pass_through(dev);
+ }
+
+ return ret;
+}
+
+static int identity_domain_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+
+ if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
+ return -EOPNOTSUPP;
+
+ return intel_pasid_setup_pass_through(iommu, dev, pasid);
+}
+
+static struct iommu_domain identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &(const struct iommu_domain_ops) {
+ .attach_dev = identity_domain_attach_dev,
+ .set_dev_pasid = identity_domain_set_dev_pasid,
+ },
+};
+
const struct iommu_ops intel_iommu_ops = {
.blocked_domain = &blocking_domain,
+ .identity_domain = &identity_domain,
.capable = intel_iommu_capable,
.hw_info = intel_iommu_hw_info,
.domain_alloc = intel_iommu_domain_alloc,
@@ -493,7 +493,7 @@ void intel_drain_pasid_prq(struct device *dev, u32 pasid)
domain = info->domain;
pdev = to_pci_dev(dev);
sid = PCI_DEVID(info->bus, info->devfn);
- did = domain_id_iommu(domain, iommu);
+ did = domain ? domain_id_iommu(domain, iommu) : FLPT_DEFAULT_DID;
qdep = pci_ats_queue_depth(pdev);
/*