[RFC,2/5] iommu/vt-d: Add generic IO page table support

Message ID 20231106071226.9656-3-tina.zhang@intel.com
State New
Headers
Series virtio-iommu: Add VT-d IO page table |

Commit Message

Zhang, Tina Nov. 6, 2023, 7:12 a.m. UTC
  Add basic hook up code to implement generic IO page table framework.

Signed-off-by: Tina Zhang <tina.zhang@intel.com>
---
 drivers/iommu/intel/Kconfig |  1 +
 drivers/iommu/intel/iommu.c | 94 +++++++++++++++++++++++++++++++++++++
 drivers/iommu/intel/iommu.h |  7 +++
 drivers/iommu/io-pgtable.c  |  3 ++
 include/linux/io-pgtable.h  |  2 +
 5 files changed, 107 insertions(+)
  

Comments

Jason Gunthorpe Nov. 6, 2023, 7:32 p.m. UTC | #1
On Mon, Nov 06, 2023 at 02:12:23AM -0500, Tina Zhang wrote:
> Add basic hook up code to implement generic IO page table framework.
> 
> Signed-off-by: Tina Zhang <tina.zhang@intel.com>
> ---
>  drivers/iommu/intel/Kconfig |  1 +
>  drivers/iommu/intel/iommu.c | 94 +++++++++++++++++++++++++++++++++++++
>  drivers/iommu/intel/iommu.h |  7 +++
>  drivers/iommu/io-pgtable.c  |  3 ++
>  include/linux/io-pgtable.h  |  2 +
>  5 files changed, 107 insertions(+)

If this is going to happen can we also convert vt-d to actually use
the io page table stuff directly and shuffle the code around so it is
structured like the rest of the io page table implementations?

Jason
  
Zhang, Tina Nov. 9, 2023, 12:10 a.m. UTC | #2
Hi Jason,

> -----Original Message-----
> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Tuesday, November 7, 2023 3:32 AM
> To: Zhang, Tina <tina.zhang@intel.com>
> Cc: Jean-Philippe Brucker <jean-philippe@linaro.org>; Tian, Kevin
> <kevin.tian@intel.com>; Lu Baolu <baolu.lu@linux.intel.com>; joro@8bytes.org;
> will@kernel.org; Liu, Yi L <yi.l.liu@intel.com>; virtualization@lists.linux-
> foundation.org; iommu@lists.linux.dev; linux-kernel@vger.kernel.org
> Subject: Re: [RFC PATCH 2/5] iommu/vt-d: Add generic IO page table support
> 
> On Mon, Nov 06, 2023 at 02:12:23AM -0500, Tina Zhang wrote:
> > Add basic hook up code to implement generic IO page table framework.
> >
> > Signed-off-by: Tina Zhang <tina.zhang@intel.com>
> > ---
> >  drivers/iommu/intel/Kconfig |  1 +
> >  drivers/iommu/intel/iommu.c | 94
> > +++++++++++++++++++++++++++++++++++++
> >  drivers/iommu/intel/iommu.h |  7 +++
> >  drivers/iommu/io-pgtable.c  |  3 ++
> >  include/linux/io-pgtable.h  |  2 +
> >  5 files changed, 107 insertions(+)
> 
> If this is going to happen can we also convert vt-d to actually use the io page
> table stuff directly and shuffle the code around so it is structured like the rest of
> the io page table implementations?
Converting VT-d driver to use io page table involves much code change. I made a local version of it, and it didn't prove much benefit.

VT-d only supports one 1st-stage IO pgtable format and one 2nd-stage IO pgtable format. So, the current IO pgtable handling operations seems more efficient comparing to adding another layer callbacks in them.

We have a plan to add a new io_pgtable.c file under VT-d driver directory and use that file to collect IO pgtable related functions. But we want to hold on converting VT-d to use the IO page table directly unless we can see some benefits.

Regards,
-Tina
> 
> Jason
  
Jason Gunthorpe Nov. 9, 2023, 12:33 a.m. UTC | #3
On Thu, Nov 09, 2023 at 12:10:59AM +0000, Zhang, Tina wrote:

> > If this is going to happen can we also convert vt-d to actually use the io page
> > table stuff directly and shuffle the code around so it is structured like the rest of
> > the io page table implementations?

> Converting VT-d driver to use io page table involves much code
> change. I made a local version of it, and it didn't prove much
> benefit.

Well, it structures the code in a similar way to all the other
drivers, though I admit to having not looked closely at the io page
table stuff.
 
> VT-d only supports one 1st-stage IO pgtable format and one 2nd-stage
> IO pgtable format. So, the current IO pgtable handling operations
> seems more efficient comparing to adding another layer callbacks in
> them.

I would like to de-virtualize those callbacks, is is completely
pointless when we have per-format iommu_domain ops now.

Jason
  

Patch

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 2e56bd79f589..8334e7e50e69 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -15,6 +15,7 @@  config INTEL_IOMMU
 	select DMA_OPS
 	select IOMMU_API
 	select IOMMU_IOVA
+	select IOMMU_IO_PGTABLE
 	select NEED_DMA_MAP_STATE
 	select DMAR_TABLE
 	select SWIOTLB
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index dbcdf7b95b9f..80bd1993861c 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -23,6 +23,7 @@ 
 #include <linux/syscore_ops.h>
 #include <linux/tboot.h>
 #include <uapi/linux/iommufd.h>
+#include <linux/io-pgtable.h>
 
 #include "iommu.h"
 #include "../dma-iommu.h"
@@ -67,6 +68,20 @@ 
 #define LEVEL_STRIDE		(9)
 #define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
 
+#define io_pgtable_cfg_to_dmar_pgtable(x) \
+	container_of((x), struct dmar_io_pgtable, pgtbl_cfg)
+
+#define io_pgtable_to_dmar_pgtable(x) \
+	container_of((x), struct dmar_io_pgtable, iop)
+
+#define io_pgtable_to_dmar_domain(x) \
+	container_of(io_pgtable_to_dmar_pgtable(x), \
+		struct dmar_domain, dmar_iop)
+
+#define io_pgtable_ops_to_dmar_domain(x) \
+	container_of(io_pgtable_to_dmar_pgtable(io_pgtable_ops_to_pgtable(x)), \
+		struct dmar_domain, dmar_iop)
+
 static inline int agaw_to_level(int agaw)
 {
 	return agaw + 2;
@@ -5171,3 +5186,82 @@  int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob)
 
 	return ret;
 }
+
+static void flush_all(void *cookie)
+{
+}
+
+static void flush_walk(unsigned long iova, size_t size,
+		       size_t granule, void *cookie)
+{
+}
+
+static void add_page(struct iommu_iotlb_gather *gather,
+		     unsigned long iova, size_t granule,
+		     void *cookie)
+{
+}
+
+static const struct iommu_flush_ops flush_ops = {
+	.tlb_flush_all  = flush_all,
+	.tlb_flush_walk = flush_walk,
+	.tlb_add_page   = add_page,
+};
+
+static void free_pgtable(struct io_pgtable *iop)
+{
+	struct dmar_domain *dmar_domain = io_pgtable_to_dmar_domain(iop);
+
+	if (dmar_domain->pgd) {
+		LIST_HEAD(freelist);
+
+		domain_unmap(dmar_domain, 0, DOMAIN_MAX_PFN(dmar_domain->gaw), &freelist);
+		put_pages_list(&freelist);
+	}
+}
+
+static int pgtable_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+			     phys_addr_t paddr, size_t pgsize, size_t pgcount,
+			     int iommu_prot, gfp_t gfp, size_t *mapped)
+{
+	struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops);
+
+	return intel_iommu_map_pages(&dmar_domain->domain, iova, paddr, pgsize,
+				     pgcount, iommu_prot, gfp, mapped);
+}
+
+static size_t pgtable_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova,
+				  size_t pgsize, size_t pgcount,
+				  struct iommu_iotlb_gather *gather)
+{
+	struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops);
+
+	return intel_iommu_unmap_pages(&dmar_domain->domain, iova, pgsize,
+				       pgcount, gather);
+}
+
+static phys_addr_t pgtable_iova_to_phys(struct io_pgtable_ops *ops,
+					unsigned long iova)
+{
+	struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops);
+
+	return intel_iommu_iova_to_phys(&dmar_domain->domain, iova);
+}
+
+static struct io_pgtable *alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
+{
+	struct dmar_io_pgtable *pgtable = io_pgtable_cfg_to_dmar_pgtable(cfg);
+
+	pgtable->iop.ops.map_pages = pgtable_map_pages;
+	pgtable->iop.ops.unmap_pages = pgtable_unmap_pages;
+	pgtable->iop.ops.iova_to_phys = pgtable_iova_to_phys;
+
+	cfg->tlb = &flush_ops;
+
+	return &pgtable->iop;
+}
+
+struct io_pgtable_init_fns io_pgtable_intel_iommu_init_fns = {
+	.alloc = alloc_pgtable,
+	.free  = free_pgtable,
+};
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 8d0aac71c135..5207fea6477a 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -18,6 +18,7 @@ 
 #include <linux/list.h>
 #include <linux/iommu.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/io-pgtable.h>
 #include <linux/dmar.h>
 #include <linux/bitfield.h>
 #include <linux/xarray.h>
@@ -579,6 +580,11 @@  struct iommu_domain_info {
 					 * to VT-d spec, section 9.3 */
 };
 
+struct dmar_io_pgtable {
+	struct io_pgtable_cfg   pgtbl_cfg;
+	struct io_pgtable       iop;
+};
+
 struct dmar_domain {
 	int	nid;			/* node id */
 	struct xarray iommu_array;	/* Attached IOMMU array */
@@ -633,6 +639,7 @@  struct dmar_domain {
 
 	struct iommu_domain domain;	/* generic domain data structure for
 					   iommu core */
+	struct dmar_io_pgtable dmar_iop;
 };
 
 /*
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 5755dee96a68..533b27557290 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -35,6 +35,9 @@  io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
 #ifdef CONFIG_IOMMU_IO_PGTABLE_VIRT
 	[VIRT_IO_PGTABLE] = &io_pgtable_virt_init_fns,
 #endif
+#ifdef CONFIG_INTEL_IOMMU
+	[INTEL_IOMMU] = &io_pgtable_intel_iommu_init_fns,
+#endif
 };
 
 struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index bdcade2c4844..b2857c18f963 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -20,6 +20,7 @@  enum io_pgtable_fmt {
 	APPLE_DART,
 	APPLE_DART2,
 	VIRT_IO_PGTABLE,
+	INTEL_IOMMU,
 	IO_PGTABLE_NUM_FMTS,
 };
 
@@ -281,5 +282,6 @@  extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_virt_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_intel_iommu_init_fns;
 
 #endif /* __IO_PGTABLE_H */