To support nested translation, the parent domain is allocated with flag
IOMMU_HWPT_ALLOC_NEST_PARENT, and stores information of the v1 page table
for stage 2 (i.e. GPA->SPA), whereas the child domain stores information
of the GCR3 root pointer table for stage 1 (i.e. GVA->GPA).
Modify the current driver to handle the domain allocation with type
IOMMU_DOMAIN_NESTED. Also, when allocating the child domain (with the
parent domain is specified), keeps track the parent using the struct
protection_domain.parent.
Note that current implementation requires AMD IOMMU GCR3TRPMode feature,
which program DTE[GCR3 Table Root Pointer] with the GPA provided by the
guest via struct iommu_hwpt_amd_v2, which is passed as a parameter of
the struct iommu_ops.domain_alloc_user().
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
drivers/iommu/amd/Makefile | 2 +-
drivers/iommu/amd/amd_iommu.h | 10 +++
drivers/iommu/amd/amd_iommu_types.h | 6 ++
drivers/iommu/amd/iommu.c | 96 ++++++++++++++++++++++++++---
drivers/iommu/amd/nested.c | 75 ++++++++++++++++++++++
5 files changed, 181 insertions(+), 8 deletions(-)
create mode 100644 drivers/iommu/amd/nested.c
> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> Sent: Friday, January 12, 2024 8:07 AM
>
> To support nested translation, the parent domain is allocated with flag
> IOMMU_HWPT_ALLOC_NEST_PARENT, and stores information of the v1 page
> table
> for stage 2 (i.e. GPA->SPA), whereas the child domain stores information
> of the GCR3 root pointer table for stage 1 (i.e. GVA->GPA).
put support of NEST_PARENT in a separate patch.
> @@ -569,6 +572,9 @@ struct protection_domain {
> bool dirty_tracking; /* dirty tracking is enabled in the domain */
> unsigned dev_cnt; /* devices assigned to this domain */
> unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference
> count */
> + struct protection_domain *parent; /* Nested parent domain */
> + u16 guest_paging_mode; /* Guest paging mode */
> + u16 guest_domain_id; /* Guest domain ID */
not used
> +struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
> struct device *dev, u32 flags)
> {
> bool dirty_tracking = flags &
> IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
> @@ -2454,7 +2465,10 @@ static struct iommu_domain
> *do_iommu_domain_alloc(unsigned int type,
> if (iommu) {
> domain->domain.type = type;
> domain->domain.pgsize_bitmap = iommu->iommu.ops-
> >pgsize_bitmap;
> - domain->domain.ops = iommu->iommu.ops-
> >default_domain_ops;
> + if (type == IOMMU_DOMAIN_NESTED)
> + domain->domain.ops = &nested_domain_ops;
> + else
> + domain->domain.ops = iommu->iommu.ops-
> >default_domain_ops;
>
> if (dirty_tracking)
> domain->domain.dirty_ops = &amd_dirty_ops;
dirty_tracking doesn't apply to nested domain
> +
> +static bool check_nested_support(u32 flags)
> +{
> + if (!(flags & IOMMU_HWPT_ALLOC_NEST_PARENT))
> + return true;
it's more readable by putting this check in the caller.
> +
> + /*
> + * When allocated nested parent domain, the device may already
> + * have been attached to a domain. For example, a device is already
> + * attached to the domain allocated by VFIO, which contains GPA-
> >SPA mapping.
> + * In such case, return reference to the same domain.
> + */
> + if (dev_data->domain && nested_parent) {
> + pr_debug("%s: Found exist: protection domain id=%#x\n",
> + __func__, dev_data->domain->id);
> + dom = &dev_data->domain->domain;
alloc() shouldn't deal with domain reuse. it's the decision in the
caller. If the caller wants to reuse then it can try to attach to an
existing domain if compatible.
if the caller wants to create a new domain then just follow it.
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o
+obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o nested.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
@@ -7,6 +7,7 @@
#ifndef AMD_IOMMU_H
#define AMD_IOMMU_H
+#include <uapi/linux/iommufd.h>
#include <linux/iommu.h>
#include "amd_iommu_types.h"
@@ -182,4 +183,13 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
extern bool amd_iommu_snp_en;
+
+/* NESTED */
+struct protection_domain *to_pdomain(struct iommu_domain *dom);
+bool amd_iommu_domain_is_nested(struct protection_domain *pdom);
+struct iommu_domain *
+amd_iommu_nested_domain_alloc(struct device *dev, unsigned int type, u32 flags,
+ struct iommu_hwpt_amd_v2 *hwpt,
+ struct iommu_domain *parent);
+
#endif
@@ -110,6 +110,8 @@
#define FEATURE_PASMAX_MASK (0x1FULL << FEATURE_PASMAX_SHIFT)
/* Extended Feature 2 Bits */
+#define FEATURE_GCR3TRPMODE BIT_ULL(3)
+
#define FEATURE_SNPAVICSUP_SHIFT 5
#define FEATURE_SNPAVICSUP_MASK (0x07ULL << FEATURE_SNPAVICSUP_SHIFT)
#define FEATURE_SNPAVICSUP_GAM(x) \
@@ -535,6 +537,7 @@ struct amd_irte_ops;
struct gcr3_tbl_info {
u64 *gcr3_tbl; /* Guest CR3 table */
+ u64 trp_gpa; /* Guest CR3 TRP GPA for nested domain */
int glx; /* Number of levels for GCR3 table */
u32 pasid_cnt; /* Track attached PASIDs */
bool giov; /* Track DTE[GIOV] */
@@ -569,6 +572,9 @@ struct protection_domain {
bool dirty_tracking; /* dirty tracking is enabled in the domain */
unsigned dev_cnt; /* devices assigned to this domain */
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
+ struct protection_domain *parent; /* Nested parent domain */
+ u16 guest_paging_mode; /* Guest paging mode */
+ u16 guest_domain_id; /* Guest domain ID */
};
/*
@@ -77,11 +77,16 @@ struct iommu_cmd {
struct kmem_cache *amd_iommu_irq_cache;
+static int amd_iommu_attach_device(struct iommu_domain *dom,
+ struct device *dev);
+
static void detach_device(struct device *dev);
static void set_dte_entry(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data);
+static void amd_iommu_domain_free(struct iommu_domain *dom);
+
/****************************************************************************
*
* Helper functions
@@ -191,7 +196,7 @@ static struct amd_iommu *rlookup_amd_iommu(struct device *dev)
return __rlookup_amd_iommu(seg, PCI_SBDF_TO_DEVID(devid));
}
-static struct protection_domain *to_pdomain(struct iommu_domain *dom)
+struct protection_domain *to_pdomain(struct iommu_domain *dom)
{
return container_of(dom, struct protection_domain, domain);
}
@@ -2367,8 +2372,9 @@ static struct protection_domain *protection_domain_alloc(unsigned int type)
domain->nid = NUMA_NO_NODE;
switch (type) {
- /* No need to allocate io pgtable ops in passthrough mode */
+ /* No need to allocate io pgtable ops in passthrough and nested mode */
case IOMMU_DOMAIN_IDENTITY:
+ case IOMMU_DOMAIN_NESTED:
return domain;
case IOMMU_DOMAIN_DMA:
pgtable = amd_iommu_pgtable;
@@ -2423,7 +2429,12 @@ static bool amd_iommu_hd_support(struct amd_iommu *iommu)
return iommu && (iommu->features & FEATURE_HDSUP);
}
-static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
+static const struct iommu_domain_ops nested_domain_ops = {
+ .attach_dev = amd_iommu_attach_device,
+ .free = amd_iommu_domain_free,
+};
+
+struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
struct device *dev, u32 flags)
{
bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
@@ -2454,7 +2465,10 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
if (iommu) {
domain->domain.type = type;
domain->domain.pgsize_bitmap = iommu->iommu.ops->pgsize_bitmap;
- domain->domain.ops = iommu->iommu.ops->default_domain_ops;
+ if (type == IOMMU_DOMAIN_NESTED)
+ domain->domain.ops = &nested_domain_ops;
+ else
+ domain->domain.ops = iommu->iommu.ops->default_domain_ops;
if (dirty_tracking)
domain->domain.dirty_ops = &amd_dirty_ops;
@@ -2474,18 +2488,86 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned int type)
return domain;
}
+static int udata_to_iommu_hwpt_amd_v2(const struct iommu_user_data *user_data,
+ struct iommu_hwpt_amd_v2 *hwpt)
+{
+ if (!user_data)
+ return -EINVAL;
+
+ if (user_data->type != IOMMU_HWPT_DATA_AMD_V2)
+ return -EOPNOTSUPP;
+
+ return iommu_copy_struct_from_user(hwpt, user_data,
+ IOMMU_HWPT_DATA_AMD_V2,
+ __reserved);
+}
+
+static bool check_nested_support(u32 flags)
+{
+ if (!(flags & IOMMU_HWPT_ALLOC_NEST_PARENT))
+ return true;
+
+ if (!check_feature(FEATURE_GT) ||
+ !check_feature(FEATURE_GIOSUP) ||
+ !check_feature2(FEATURE_GCR3TRPMODE))
+ return false;
+
+ return true;
+}
+
+static u32 amd_iommu_hwpt_supported_flags =
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
+ IOMMU_HWPT_ALLOC_NEST_PARENT;
+
static struct iommu_domain *
amd_iommu_domain_alloc_user(struct device *dev, u32 flags,
struct iommu_domain *parent,
const struct iommu_user_data *user_data)
-
{
+ struct iommu_domain *dom;
+ struct iommu_dev_data *dev_data;
unsigned int type = IOMMU_DOMAIN_UNMANAGED;
+ bool nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
+
+ if (parent) {
+ int ret;
+ struct iommu_hwpt_amd_v2 hwpt;
+
+ if (parent->ops != amd_iommu_ops.default_domain_ops)
+ return ERR_PTR(-EINVAL);
+
+ ret = udata_to_iommu_hwpt_amd_v2(user_data, &hwpt);
+ if (ret)
+ return ERR_PTR(ret);
- if ((flags & ~IOMMU_HWPT_ALLOC_DIRTY_TRACKING) || parent || user_data)
+ return amd_iommu_nested_domain_alloc(dev, type, flags,
+ &hwpt, parent);
+ }
+
+ /* Check supported flags */
+ if ((flags & ~amd_iommu_hwpt_supported_flags) ||
+ !check_nested_support(flags))
return ERR_PTR(-EOPNOTSUPP);
- return do_iommu_domain_alloc(type, dev, flags);
+ dev_data = dev_iommu_priv_get(dev);
+
+ /*
+ * When allocated nested parent domain, the device may already
+ * have been attached to a domain. For example, a device is already
+ * attached to the domain allocated by VFIO, which contains GPA->SPA mapping.
+ * In such case, return reference to the same domain.
+ */
+ if (dev_data->domain && nested_parent) {
+ pr_debug("%s: Found exist: protection domain id=%#x\n",
+ __func__, dev_data->domain->id);
+ dom = &dev_data->domain->domain;
+ } else {
+ dom = do_iommu_domain_alloc(type, dev, flags);
+ if (!dom)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return dom;
}
static void amd_iommu_domain_free(struct iommu_domain *dom)
new file mode 100644
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Advanced Micro Devices, Inc.
+ * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ */
+
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+#define dev_fmt(fmt) pr_fmt(fmt)
+
+#include <linux/iommu.h>
+#include <uapi/linux/iommufd.h>
+
+#include "amd_iommu.h"
+
+bool amd_iommu_domain_is_nested(struct protection_domain *pdom)
+{
+ return (pdom && pdom->parent != NULL);
+}
+
+static int nested_gcr3_update(struct iommu_hwpt_amd_v2 *hwpt,
+ struct protection_domain *pdom,
+ struct protection_domain *ppdom,
+ struct device *dev)
+{
+ struct pci_dev *pdev;
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
+
+ pdev = to_pci_dev(dev);
+ if (!pdev)
+ return -EINVAL;
+
+ /* Note: Currently only support GCR3TRPMode with nested translation */
+ if (!check_feature2(FEATURE_GCR3TRPMODE))
+ return -EOPNOTSUPP;
+
+ pdom->parent = ppdom;
+ pdom->guest_domain_id = hwpt->gdom_id;
+ pdom->guest_paging_mode = hwpt->flags.guest_paging_mode;
+
+ dev_data->gcr3_info.trp_gpa = hwpt->gcr3;
+ dev_data->gcr3_info.glx = hwpt->flags.glx;
+ dev_data->gcr3_info.giov = hwpt->flags.giov;
+
+ return 0;
+}
+
+struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
+ struct device *dev, u32 flags);
+struct iommu_domain *
+amd_iommu_nested_domain_alloc(struct device *dev, unsigned int type, u32 flags,
+ struct iommu_hwpt_amd_v2 *hwpt,
+ struct iommu_domain *parent)
+{
+ int ret;
+ struct iommu_domain *dom;
+ struct protection_domain *pdom;
+
+ pr_debug("%s: Allocating nested domain with parent domid=%#x\n",
+ __func__, to_pdomain(parent)->id);
+
+ dom = do_iommu_domain_alloc(IOMMU_DOMAIN_NESTED, dev, flags);
+ if (IS_ERR(dom))
+ return ERR_PTR(-ENOMEM);
+
+ pdom = to_pdomain(dom);
+ ret = nested_gcr3_update(hwpt, pdom, to_pdomain(parent), dev);
+ if (ret)
+ goto err_out;
+
+ return dom;
+
+err_out:
+ iommu_domain_free(dom);
+ return ERR_PTR(-EINVAL);
+}