[v2,5/5] iommu/vt-d: Add nested domain support

Message ID 20230309082207.612346-6-yi.l.liu@intel.com
State New
Headers
Series Add Intel VT-d nested translation |

Commit Message

Yi Liu March 9, 2023, 8:22 a.m. UTC
  From: Lu Baolu <baolu.lu@linux.intel.com>

This adds nested domain support in the Intel IOMMU driver. It allows to
allocate and free a nested domain, set the nested domain to a device,
and synchronize the caches when the userspace managed page tables are
updated.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/iommu/intel/Makefile |   2 +-
 drivers/iommu/intel/iommu.c  |  38 ++++++----
 drivers/iommu/intel/iommu.h  |  15 ++++
 drivers/iommu/intel/nested.c | 143 +++++++++++++++++++++++++++++++++++
 4 files changed, 182 insertions(+), 16 deletions(-)
 create mode 100644 drivers/iommu/intel/nested.c
  

Comments

Liu, Jingqi March 13, 2023, 1:51 p.m. UTC | #1
On 3/9/2023 4:22 PM, Yi Liu wrote:
> From: Lu Baolu <baolu.lu@linux.intel.com>
>
> This adds nested domain support in the Intel IOMMU driver. It allows to
> allocate and free a nested domain, set the nested domain to a device,
> and synchronize the caches when the userspace managed page tables are
> updated.
>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> ---
>   drivers/iommu/intel/Makefile |   2 +-
>   drivers/iommu/intel/iommu.c  |  38 ++++++----
>   drivers/iommu/intel/iommu.h  |  15 ++++
>   drivers/iommu/intel/nested.c | 143 +++++++++++++++++++++++++++++++++++
>   4 files changed, 182 insertions(+), 16 deletions(-)
>   create mode 100644 drivers/iommu/intel/nested.c
[...]

> +
> +struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *s2_domain,
> +					       const void *user_data)
> +{
> +	const struct iommu_hwpt_intel_vtd *vtd = user_data;
> +	struct dmar_domain *domain;
> +

Would it be better to add the following checkĀ ?

     	if (WARN_ON(!user_data))
		return NULL;

> +	domain = kzalloc(sizeof(*domain), GFP_KERNEL_ACCOUNT);
> +	if (!domain)
> +		return NULL;
> +
> +	domain->use_first_level = true;
> +	domain->s2_domain = to_dmar_domain(s2_domain);
> +	domain->s1_pgtbl = vtd->pgtbl_addr;
> +	domain->s1_cfg = *vtd;
> +	domain->domain.ops = &intel_nested_domain_ops;
> +	domain->domain.type = IOMMU_DOMAIN_NESTED;
> +	INIT_LIST_HEAD(&domain->devices);
> +	spin_lock_init(&domain->lock);
> +	xa_init(&domain->iommu_array);
> +
> +	return &domain->domain;
> +}

Thanks,

Jingqi
  
Baolu Lu March 14, 2023, 1:21 a.m. UTC | #2
On 3/13/23 5:11 PM, Liu, Jingqi wrote:
>> +struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *s2_domain,
>> +					       const void *user_data)
>> +{
>> +	const struct iommu_hwpt_intel_vtd *vtd = user_data;
>> +	struct dmar_domain *domain;
>> +
> 
> Would it be better to add the following checkĀ ?
> 
> 	if (WARN_ON(!user_data))
> 		return NULL;

The iommufd has already done the sanity check. Considering that this
callback is only for iommufd purpose, the individual driver has no need
to check it.

Best regards,
baolu
  

Patch

diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index 7af3b8a4f2a0..5dabf081a779 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -1,6 +1,6 @@ 
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_DMAR_TABLE) += dmar.o
-obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o
+obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o
 obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
 obj-$(CONFIG_DMAR_PERF) += perf.o
 obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 004406209793..89cc7095afed 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -277,7 +277,6 @@  static LIST_HEAD(dmar_satc_units);
 #define for_each_rmrr_units(rmrr) \
 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
 
-static void device_block_translation(struct device *dev);
 static void intel_iommu_domain_free(struct iommu_domain *domain);
 
 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
@@ -555,7 +554,7 @@  static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
 }
 
 /* Some capabilities may be different across iommus */
-static void domain_update_iommu_cap(struct dmar_domain *domain)
+void domain_update_iommu_cap(struct dmar_domain *domain)
 {
 	domain_update_iommu_coherency(domain);
 	domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
@@ -1498,10 +1497,10 @@  static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
 
-static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
-				  struct dmar_domain *domain,
-				  unsigned long pfn, unsigned int pages,
-				  int ih, int map)
+void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
+			   struct dmar_domain *domain,
+			   unsigned long pfn, unsigned int pages,
+			   int ih, int map)
 {
 	unsigned int aligned_pages = __roundup_pow_of_two(pages);
 	unsigned int mask = ilog2(aligned_pages);
@@ -1573,7 +1572,7 @@  static inline void __mapping_notify_one(struct intel_iommu *iommu,
 		iommu_flush_write_buffer(iommu);
 }
 
-static void intel_flush_iotlb_all(struct iommu_domain *domain)
+void intel_flush_iotlb_all(struct iommu_domain *domain)
 {
 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
 	struct iommu_domain_info *info;
@@ -1765,8 +1764,7 @@  static struct dmar_domain *alloc_domain(unsigned int type)
 	return domain;
 }
 
-static int domain_attach_iommu(struct dmar_domain *domain,
-			       struct intel_iommu *iommu)
+int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
 {
 	struct iommu_domain_info *info, *curr;
 	unsigned long ndomains;
@@ -1815,8 +1813,7 @@  static int domain_attach_iommu(struct dmar_domain *domain,
 	return ret;
 }
 
-static void domain_detach_iommu(struct dmar_domain *domain,
-				struct intel_iommu *iommu)
+void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
 {
 	struct iommu_domain_info *info;
 
@@ -4103,7 +4100,7 @@  static void dmar_remove_one_dev_info(struct device *dev)
  * all DMA requests without PASID from the device are blocked. If the page
  * table has been set, clean up the data structures.
  */
-static void device_block_translation(struct device *dev)
+void device_block_translation(struct device *dev)
 {
 	struct device_domain_info *info = dev_iommu_priv_get(dev);
 	struct intel_iommu *iommu = info->iommu;
@@ -4204,14 +4201,24 @@  static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
 	return NULL;
 }
 
+static struct iommu_domain *
+intel_iommu_domain_alloc_user(struct device *dev, struct iommu_domain *parent,
+			      const void *user_data)
+{
+	if (parent)
+		return intel_nested_domain_alloc(parent, user_data);
+	else
+		return iommu_domain_alloc(dev->bus);
+}
+
 static void intel_iommu_domain_free(struct iommu_domain *domain)
 {
 	if (domain != &si_domain->domain && domain != &blocking_domain)
 		domain_exit(to_dmar_domain(domain));
 }
 
-static int prepare_domain_attach_device(struct iommu_domain *domain,
-					struct device *dev)
+int prepare_domain_attach_device(struct iommu_domain *domain,
+				 struct device *dev)
 {
 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
 	struct intel_iommu *iommu;
@@ -4450,7 +4457,7 @@  static void domain_set_force_snooping(struct dmar_domain *domain)
 						     PASID_RID2PASID);
 }
 
-static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
+bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
 {
 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
 	unsigned long flags;
@@ -4801,6 +4808,7 @@  const struct iommu_ops intel_iommu_ops = {
 	.capable		= intel_iommu_capable,
 	.hw_info		= intel_iommu_hw_info,
 	.domain_alloc		= intel_iommu_domain_alloc,
+	.domain_alloc_user	= intel_iommu_domain_alloc_user,
 	.probe_device		= intel_iommu_probe_device,
 	.probe_finalize		= intel_iommu_probe_finalize,
 	.release_device		= intel_iommu_release_device,
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 9446e17dd202..661fb5050e2d 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -848,6 +848,19 @@  void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu,
 
 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
 		   unsigned int count, unsigned long options);
+int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
+void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
+void device_block_translation(struct device *dev);
+int prepare_domain_attach_device(struct iommu_domain *domain,
+				 struct device *dev);
+bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain);
+void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
+			   struct dmar_domain *domain,
+			   unsigned long pfn, unsigned int pages,
+			   int ih, int map);
+void intel_flush_iotlb_all(struct iommu_domain *domain);
+void domain_update_iommu_cap(struct dmar_domain *domain);
+
 /*
  * Options used in qi_submit_sync:
  * QI_OPT_WAIT_DRAIN - Wait for PRQ drain completion, spec 6.5.2.8.
@@ -860,6 +873,8 @@  void *alloc_pgtable_page(int node, gfp_t gfp);
 void free_pgtable_page(void *vaddr);
 void iommu_flush_write_buffer(struct intel_iommu *iommu);
 struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
+struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *s2_domain,
+					       const void *data);
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
 extern void intel_svm_check(struct intel_iommu *iommu);
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
new file mode 100644
index 000000000000..64441d11a84d
--- /dev/null
+++ b/drivers/iommu/intel/nested.c
@@ -0,0 +1,143 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * nested.c - nested mode translation support
+ *
+ * Copyright (C) 2023 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ *         Jacob Pan <jacob.jun.pan@linux.intel.com>
+ */
+
+#define pr_fmt(fmt)	"DMAR: " fmt
+
+#include <linux/iommu.h>
+#include <linux/pci.h>
+#include <linux/pci-ats.h>
+
+#include "iommu.h"
+#include "pasid.h"
+
+static int intel_nested_attach_dev(struct iommu_domain *domain,
+				   struct device *dev)
+{
+	struct device_domain_info *info = dev_iommu_priv_get(dev);
+	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+	struct intel_iommu *iommu = info->iommu;
+	unsigned long flags;
+	int ret = 0;
+
+	if (info->domain)
+		device_block_translation(dev);
+
+	/* Is s2_domain compatible with this IOMMU? */
+	ret = prepare_domain_attach_device(&dmar_domain->s2_domain->domain, dev);
+	if (ret) {
+		dev_err_ratelimited(dev, "s2 domain is not compatible\n");
+		return ret;
+	}
+
+	ret = domain_attach_iommu(dmar_domain, iommu);
+	if (ret) {
+		dev_err_ratelimited(dev, "Failed to attach domain to iommu\n");
+		return ret;
+	}
+
+	ret = intel_pasid_setup_nested(iommu, dev,
+				       PASID_RID2PASID, dmar_domain);
+	if (ret) {
+		domain_detach_iommu(dmar_domain, iommu);
+		dev_err_ratelimited(dev, "Failed to setup pasid entry\n");
+		return ret;
+	}
+
+	info->domain = dmar_domain;
+	spin_lock_irqsave(&dmar_domain->lock, flags);
+	list_add(&info->link, &dmar_domain->devices);
+	spin_unlock_irqrestore(&dmar_domain->lock, flags);
+	domain_update_iommu_cap(dmar_domain);
+
+	return 0;
+}
+
+static void intel_nested_domain_free(struct iommu_domain *domain)
+{
+	kfree(to_dmar_domain(domain));
+}
+
+static void intel_nested_invalidate(struct device *dev,
+				    struct dmar_domain *domain,
+				    void *user_data)
+{
+	struct iommu_hwpt_invalidate_intel_vtd *inv_info = user_data;
+	struct device_domain_info *info = dev_iommu_priv_get(dev);
+	struct intel_iommu *iommu = info->iommu;
+
+	if (WARN_ON(!user_data))
+		return;
+
+	switch (inv_info->granularity) {
+	case IOMMU_VTD_QI_GRAN_ADDR:
+		if (inv_info->granule_size != VTD_PAGE_SIZE ||
+		    !IS_ALIGNED(inv_info->addr, VTD_PAGE_SIZE)) {
+			dev_err_ratelimited(dev, "Invalid invalidation address 0x%llx\n",
+					    inv_info->addr);
+			return;
+		}
+
+		iommu_flush_iotlb_psi(iommu, domain,
+				      inv_info->addr >> VTD_PAGE_SHIFT,
+				      inv_info->nb_granules, 1, 0);
+		break;
+	case IOMMU_VTD_QI_GRAN_DOMAIN:
+		intel_flush_iotlb_all(&domain->domain);
+		break;
+	default:
+		dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n",
+				    inv_info->granularity);
+		break;
+	}
+}
+
+static void intel_nested_cache_invalidate_user(struct iommu_domain *domain,
+					       void *user_data)
+{
+	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+	struct device_domain_info *info;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dmar_domain->lock, flags);
+	list_for_each_entry(info, &dmar_domain->devices, link)
+		intel_nested_invalidate(info->dev, dmar_domain,
+					user_data);
+	spin_unlock_irqrestore(&dmar_domain->lock, flags);
+}
+
+static const struct iommu_domain_ops intel_nested_domain_ops = {
+	.attach_dev		= intel_nested_attach_dev,
+	.cache_invalidate_user	= intel_nested_cache_invalidate_user,
+	.free			= intel_nested_domain_free,
+	.enforce_cache_coherency = intel_iommu_enforce_cache_coherency,
+};
+
+struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *s2_domain,
+					       const void *user_data)
+{
+	const struct iommu_hwpt_intel_vtd *vtd = user_data;
+	struct dmar_domain *domain;
+
+	domain = kzalloc(sizeof(*domain), GFP_KERNEL_ACCOUNT);
+	if (!domain)
+		return NULL;
+
+	domain->use_first_level = true;
+	domain->s2_domain = to_dmar_domain(s2_domain);
+	domain->s1_pgtbl = vtd->pgtbl_addr;
+	domain->s1_cfg = *vtd;
+	domain->domain.ops = &intel_nested_domain_ops;
+	domain->domain.type = IOMMU_DOMAIN_NESTED;
+	INIT_LIST_HEAD(&domain->devices);
+	spin_lock_init(&domain->lock);
+	xa_init(&domain->iommu_array);
+
+	return &domain->domain;
+}