[RFC,20/21] iommu/amd: Introduce vIOMMU ioctl for setting up guest CR3

Message ID 20230621235508.113949-21-suravee.suthikulpanit@amd.com
State New
Headers
Series iommu/amd: Introduce support for HW accelerated vIOMMU w/ nested page table |

Commit Message

Suravee Suthikulpanit June 21, 2023, 11:55 p.m. UTC
  This ioctl interface sets up guest CR3 (gCR3) table, which
is defined by guest IOMMU driver. It also enables nested
I/O page translation in the host.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 drivers/iommu/amd/amd_iommu.h |  12 ++++
 drivers/iommu/amd/iommu.c     | 107 ++++++++++++++++++++++++++++++++++
 drivers/iommu/amd/viommu.c    |  36 ++++++++++++
 include/linux/iommu.h         |   1 +
 include/uapi/linux/iommufd.h  |  20 +++++++
 5 files changed, 176 insertions(+)
  

Patch

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index fccae07e8c9f..463cd59127b7 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -84,6 +84,18 @@  extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain);
 extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
 extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
 				     unsigned long cr3);
+extern int amd_viommu_user_gcr3_update(const void *user_data,
+				       struct iommu_domain *udom);
+extern int amd_iommu_setup_gcr3_table(struct amd_iommu *iommu,
+				      struct pci_dev *pdev,
+				      struct iommu_domain *dom,
+				      struct iommu_domain *udom,
+				      int pasids, bool giov);
+extern int amd_iommu_user_set_gcr3(struct amd_iommu *iommu,
+				   struct iommu_domain *dom,
+				   struct iommu_domain *udom,
+				   struct pci_dev *pdev, u32 pasid,
+				   unsigned long cr3);
 extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid);
 extern void amd_iommu_iotlb_sync(struct iommu_domain *domain,
 				 struct iommu_iotlb_gather *gather);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index f22b2a5a8bfc..bff53977f8f7 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -80,6 +80,8 @@  struct kmem_cache *amd_iommu_irq_cache;
 
 static void detach_device(struct device *dev);
 static int domain_enable_v2(struct protection_domain *domain, int pasids, bool giov);
+static int __set_gcr3(struct protection_domain *domain, u32 pasid,
+		      unsigned long cr3);
 
 /****************************************************************************
  *
@@ -2525,10 +2527,43 @@  static void *amd_iommu_hw_info(struct device *dev, u32 *length)
 	return hwinfo;
 }
 
+static struct iommu_domain *
+amd_iommu_domain_alloc_user(struct device *dev,
+			    enum iommu_hwpt_type hwpt_type,
+			    struct iommu_domain *parent,
+			    const union iommu_domain_user_data *user_data)
+{
+	int ret;
+	struct iommu_domain *dom = iommu_domain_alloc(dev->bus);
+
+	if (!dom || !parent)
+		return dom;
+
+	/*
+	 * The parent is not null only when external driver calls IOMMUFD kAPI
+	 * to create IOMMUFD_OBJ_HW_PAGETABLE to attach a bound device to IOAS.
+	 * This is for nested (v2) page table.
+	 *
+	 * TODO: Currently, only support nested table w/ 1 pasid for GIOV use case.
+	 *       Add support for multiple pasids.
+	 */
+	dom->type = IOMMU_DOMAIN_NESTED;
+
+	ret = amd_viommu_user_gcr3_update(user_data, dom);
+	if (ret)
+		goto err_out;
+
+	return dom;
+err_out:
+	iommu_domain_free(dom);
+	return NULL;
+}
+
 const struct iommu_ops amd_iommu_ops = {
 	.capable		= amd_iommu_capable,
 	.hw_info		= amd_iommu_hw_info,
 	.domain_alloc		= amd_iommu_domain_alloc,
+	.domain_alloc_user	= amd_iommu_domain_alloc_user,
 	.probe_device		= amd_iommu_probe_device,
 	.release_device		= amd_iommu_release_device,
 	.probe_finalize		= amd_iommu_probe_finalize,
@@ -2537,6 +2572,7 @@  const struct iommu_ops amd_iommu_ops = {
 	.is_attach_deferred	= amd_iommu_is_attach_deferred,
 	.pgsize_bitmap		= AMD_IOMMU_PGSIZES,
 	.def_domain_type	= amd_iommu_def_domain_type,
+	.hw_info_type		= IOMMU_HW_INFO_TYPE_AMD,
 	.default_domain_ops	= &(const struct iommu_domain_ops) {
 		.attach_dev	= amd_iommu_attach_device,
 		.map_pages	= amd_iommu_map_pages,
@@ -2639,6 +2675,77 @@  int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids, bool giov)
 }
 EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
 
+int amd_iommu_setup_gcr3_table(struct amd_iommu *iommu, struct pci_dev *pdev,
+			       struct iommu_domain *dom,
+			       struct iommu_domain *udom,
+			       int pasids, bool giov)
+{
+	int levels;
+	struct protection_domain *pdom = to_pdomain(dom);
+	struct protection_domain *updom = to_pdomain(udom);
+	struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+
+	if (updom->gcr3_tbl)
+		return -EINVAL;
+
+	/* Number of GCR3 table levels required */
+	for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9)
+		levels += 1;
+
+	if (levels > amd_iommu_max_glx_val)
+		return -EINVAL;
+
+	updom->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC);
+	if (updom->gcr3_tbl == NULL)
+		return -ENOMEM;
+
+	updom->glx = levels;
+	updom->flags |= PD_IOMMUV2_MASK;
+	if (giov)
+		updom->flags |= PD_GIOV_MASK;
+
+	set_dte_entry(iommu, dev_data->devid, pdom, updom,
+		      updom->gcr3_tbl,
+		      dev_data->ats.enabled, false);
+	clone_aliases(iommu, dev_data->dev);
+
+	iommu_flush_dte(iommu, dev_data->devid);
+	iommu_completion_wait(iommu);
+	return 0;
+}
+
+/*
+ * Note: For vIOMMU, the guest could be using different
+ *       GCR3 table for each VFIO pass-through device.
+ *       Therefore, we need to per-device GCR3 table.
+ */
+int amd_iommu_user_set_gcr3(struct amd_iommu *iommu,
+			    struct iommu_domain *dom,
+			    struct iommu_domain *udom,
+			    struct pci_dev *pdev, u32 pasid,
+			    unsigned long cr3)
+{
+	struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+	struct protection_domain *domain = to_pdomain(dom);
+	struct protection_domain *udomain = to_pdomain(udom);
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&domain->lock, flags);
+	spin_lock_irqsave(&udomain->lock, flags);
+
+	ret = __set_gcr3(udomain, pasid, cr3);
+	if (!ret) {
+		device_flush_dte(dev_data);
+		iommu_completion_wait(iommu);
+	}
+
+	spin_unlock_irqrestore(&udomain->lock, flags);
+	spin_unlock_irqrestore(&domain->lock, flags);
+
+	return ret;
+}
+
 static int __flush_pasid(struct protection_domain *domain, u32 pasid,
 			 u64 address, bool size)
 {
diff --git a/drivers/iommu/amd/viommu.c b/drivers/iommu/amd/viommu.c
index 1bd4282384c4..8ce3ee3d6bf5 100644
--- a/drivers/iommu/amd/viommu.c
+++ b/drivers/iommu/amd/viommu.c
@@ -1072,3 +1072,39 @@  int amd_viommu_cmdbuf_update(struct amd_viommu_cmdbuf_data *data)
 	return -EINVAL;
 }
 EXPORT_SYMBOL(amd_viommu_cmdbuf_update);
+
+int amd_viommu_user_gcr3_update(const void *user_data, struct iommu_domain *udom)
+{
+	int ret;
+	struct pci_dev *pdev;
+	unsigned long npinned;
+	struct page *pages[2];
+	struct iommu_domain *dom;
+	struct iommu_hwpt_amd_v2 *hwpt = (struct iommu_hwpt_amd_v2 *)user_data;
+	struct amd_iommu *iommu = get_amd_iommu_from_devid(hwpt->iommu_id);
+	u16 hdev_id = viommu_get_hdev_id(iommu, hwpt->gid, hwpt->gdev_id);
+
+	pr_debug("%s: gid=%u, hdev_id=%#x, gcr3_va=%#llx\n",
+		 __func__, hwpt->gid, hdev_id, (unsigned long long) hwpt->gcr3_va);
+
+	npinned = get_user_pages_fast(hwpt->gcr3_va, 1, FOLL_WRITE, pages);
+	if (!npinned) {
+		pr_err("Failure locking grc3 page (%#llx).\n", hwpt->gcr3_va);
+		return -EINVAL;
+	}
+
+	/* Allocate gcr3 table */
+	pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(hdev_id),
+					   hdev_id & 0xff);
+	dom = iommu_get_domain_for_dev(&pdev->dev);
+	if (!dom)
+		return -EINVAL;
+
+	/* TODO: Only support 1 pasid (zero) for now */
+	ret = amd_iommu_setup_gcr3_table(iommu, pdev, dom, udom, 1,
+					 iommu_feature(iommu, FEATURE_GIOSUP));
+	if (ret)
+		pr_err("%s: Fail to enable gcr3 (devid=%#x)\n", __func__, pci_dev_id(pdev));
+
+	return amd_iommu_user_set_gcr3(iommu, dom, udom, pdev, 0, hwpt->gcr3);
+}
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 4116f12d5f97..9239cd01d77c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -236,6 +236,7 @@  union iommu_domain_user_data {
 #endif
 	struct iommu_hwpt_vtd_s1 vtd;
 	struct iommu_hwpt_arm_smmuv3 smmuv3;
+	struct iommu_hwpt_amd_v2 amdv2;
 };
 
 /**
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index f8ea9faf6770..4147171429e1 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -408,6 +408,23 @@  struct iommu_hwpt_arm_smmuv3 {
 	__aligned_u64 out_event_uptr;
 };
 
+/**
+ * struct iommu_hwpt_amd_v2 - AMD IOMMU specific user-managed
+ *                            v2 I/O page table data
+ * @gcr3: GCR3 guest physical ddress
+ * @gcr3_va: GCR3 host virtual address
+ * @gid: Guest ID
+ * @iommu_id: IOMMU host device ID
+ * @gdev_id: Guest device ID
+ */
+struct iommu_hwpt_amd_v2 {
+	__u64 gcr3;
+	__u64 gcr3_va;
+	__u32 gid;
+	__u32 iommu_id;
+	__u16 gdev_id;
+};
+
 /**
  * enum iommu_hwpt_type - IOMMU HWPT Type
  * @IOMMU_HWPT_TYPE_DEFAULT: default
@@ -418,6 +435,7 @@  enum iommu_hwpt_type {
 	IOMMU_HWPT_TYPE_DEFAULT,
 	IOMMU_HWPT_TYPE_VTD_S1,
 	IOMMU_HWPT_TYPE_ARM_SMMUV3,
+	IOMMU_HWPT_TYPE_AMD_V2,
 };
 
 /**
@@ -523,11 +541,13 @@  struct iommu_hw_info_amd {
  * enum iommu_hw_info_type - IOMMU Hardware Info Types
  * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
  * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type
+ * @IOMMU_HW_INFO_TYPE_AMD: AMD IOMMU info type
  */
 enum iommu_hw_info_type {
 	IOMMU_HW_INFO_TYPE_NONE,
 	IOMMU_HW_INFO_TYPE_INTEL_VTD,
 	IOMMU_HW_INFO_TYPE_ARM_SMMUV3,
+	IOMMU_HW_INFO_TYPE_AMD,
 };
 
 /**