@@ -319,4 +319,5 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
bool fault_ignore);
void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
struct device *dev, u32 pasid);
+int intel_pasid_setup_sm_context(struct device *dev, bool deferred);
#endif /* __INTEL_PASID_H */
@@ -4270,15 +4270,26 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
ret = intel_pasid_alloc_table(dev);
if (ret) {
dev_err(dev, "PASID table allocation failed\n");
- dev_iommu_priv_set(dev, NULL);
- kfree(info);
- return ERR_PTR(ret);
+ goto err_clear_priv;
+ }
+
+ ret = intel_pasid_setup_sm_context(dev, false);
+ if (ret) {
+ dev_err(dev, "Scalable context entry setup failed\n");
+ goto err_free_table;
}
}
intel_iommu_debugfs_create_dev(info);
return &iommu->iommu;
+err_free_table:
+ intel_pasid_free_table(dev);
+err_clear_priv:
+ dev_iommu_priv_set(dev, NULL);
+ kfree(info);
+
+return ERR_PTR(ret);
}
static void intel_iommu_release_device(struct device *dev)
@@ -304,6 +304,11 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
return -EINVAL;
}
+ if (intel_pasid_setup_sm_context(dev, true)) {
+ dev_err(dev, "Context entry is not configured\n");
+ return -ENODEV;
+ }
+
spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
if (!pte) {
@@ -384,6 +389,11 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
return -EINVAL;
}
+ if (intel_pasid_setup_sm_context(dev, true)) {
+ dev_err(dev, "Context entry is not configured\n");
+ return -ENODEV;
+ }
+
pgd = domain->pgd;
agaw = iommu_skip_agaw(domain, iommu, &pgd);
if (agaw < 0) {
@@ -505,6 +515,11 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
u16 did = FLPT_DEFAULT_DID;
struct pasid_entry *pte;
+ if (intel_pasid_setup_sm_context(dev, true)) {
+ dev_err(dev, "Context entry is not configured\n");
+ return -ENODEV;
+ }
+
spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
if (!pte) {
@@ -623,6 +638,11 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
return -EINVAL;
}
+ if (intel_pasid_setup_sm_context(dev, true)) {
+ dev_err_ratelimited(dev, "Context entry is not configured\n");
+ return -ENODEV;
+ }
+
spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
if (!pte) {
@@ -666,3 +686,163 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
return 0;
}
+
+/*
+ * Interface to set a pasid table to the scalable mode context table entry:
+ */
+
+/*
+ * Get the PASID directory size for scalable mode context entry.
+ * Value of X in the PDTS field of a scalable mode context entry
+ * indicates PASID directory with 2^(X + 7) entries.
+ */
+static unsigned long context_get_sm_pds(struct pasid_table *table)
+{
+ unsigned long pds, max_pde;
+
+ max_pde = table->max_pasid >> PASID_PDE_SHIFT;
+ pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS);
+ if (pds < 7)
+ return 0;
+
+ return pds - 7;
+}
+
+static int context_entry_set_pasid_table(struct context_entry *context,
+ struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct pasid_table *table = info->pasid_table;
+ struct intel_iommu *iommu = info->iommu;
+ unsigned long pds;
+
+ context_clear_entry(context);
+
+ pds = context_get_sm_pds(table);
+ context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds);
+ context_set_sm_rid2pasid(context, IOMMU_NO_PASID);
+
+ if (info->ats_supported)
+ context_set_sm_dte(context);
+ if (info->pri_supported)
+ context_set_sm_pre(context);
+ if (info->pasid_supported)
+ context_set_pasid(context);
+
+ context_set_fault_enable(context);
+ context_set_present(context);
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(context, sizeof(*context));
+
+ return 0;
+}
+
+static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ struct context_entry *context;
+ int ret = 0;
+
+ spin_lock(&iommu->lock);
+ context = iommu_context_addr(iommu, bus, devfn, true);
+ if (!context) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ if (context_present(context) && !context_copied(iommu, bus, devfn))
+ goto out_unlock;
+
+ /*
+ * Cache invalidation for changes to a scalable-mode context table
+ * entry.
+ *
+ * Section 6.5.3.3 of the VT-d spec:
+ * - Device-selective context-cache invalidation;
+ * - Domain-selective PASID-cache invalidation to affected domains
+ * (can be skipped if all PASID entries were not-present);
+ * - Domain-selective IOTLB invalidation to affected domains;
+ * - Global Device-TLB invalidation to affected functions.
+ *
+ * For kdump cases, old valid entries may be cached due to the
+ * in-flight DMA and copied pgtable, but there is no unmapping
+ * behaviour for them, thus we need explicit cache flushes for all
+ * affected domain IDs and PASIDs used in the copied PASID table.
+ * Given that we have no idea about which domain IDs and PASIDs were
+ * used in the copied tables, upgrade them to global PASID and IOTLB
+ * cache invalidation.
+ *
+ * For kdump case, at this point, the device is supposed to finish
+ * reset at its driver probe stage, so no in-flight DMA will exist,
+ * and we don't need to worry anymore hereafter.
+ */
+ if (context_copied(iommu, bus, devfn)) {
+ context_clear_entry(context);
+ clear_context_copied(iommu, bus, devfn);
+ iommu->flush.flush_context(iommu, 0,
+ (((u16)bus) << 8) | devfn,
+ DMA_CCMD_MASK_NOBIT,
+ DMA_CCMD_DEVICE_INVL);
+ qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+ devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID);
+ }
+
+ context_entry_set_pasid_table(context, dev);
+
+ /*
+ * It's a non-present to present mapping. If hardware doesn't cache
+ * non-present entry we only need to flush the write-buffer. If the
+ * _does_ cache non-present entries, then it does so in the special
+ * domain ID #0, which we have to flush:
+ */
+ if (cap_caching_mode(iommu->cap)) {
+ iommu->flush.flush_context(iommu, 0,
+ (((u16)bus) << 8) | devfn,
+ DMA_CCMD_MASK_NOBIT,
+ DMA_CCMD_DEVICE_INVL);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+ } else {
+ iommu_flush_write_buffer(iommu);
+ }
+
+out_unlock:
+ spin_unlock(&iommu->lock);
+ return ret;
+}
+
+static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data)
+{
+ struct device *dev = data;
+
+ if (dev != &pdev->dev)
+ return 0;
+
+ return device_pasid_table_setup(dev, PCI_BUS_NUM(alias), alias & 0xff);
+}
+
+/*
+ * Set the device's PASID table to its context table entry.
+ *
+ * The PASID table is set to the context entries of both device itself
+ * and its alias requester ID for DMA. If it is called in domain attach
+ * paths, set @deferred to true, false in other cases.
+ */
+int intel_pasid_setup_sm_context(struct device *dev, bool deferred)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+
+ /*
+ * Skip pasid table setting up if context entry is copied and
+ * function is not called in deferred attachment context.
+ */
+ if (deferred ^ context_copied(iommu, info->bus, info->devfn))
+ return 0;
+
+ if (!dev_is_pci(dev))
+ return device_pasid_table_setup(dev, info->bus, info->devfn);
+
+ return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev);
+}