vfio/cdx: add support for CDX bus

Message ID 20230403142525.29494-1-nipun.gupta@amd.com
State New
Headers
Series vfio/cdx: add support for CDX bus |

Commit Message

Gupta, Nipun April 3, 2023, 2:25 p.m. UTC
  vfio-cdx driver enables IOCTLs for user space to query
MMIO regions for CDX devices and mmap them. This change
also adds support for reset of CDX devices.

This change adds the VFIO CDX driver and enables the following
ioctls for CDX devices:
 - VFIO_DEVICE_GET_INFO:
 - VFIO_DEVICE_GET_REGION_INFO
 - VFIO_DEVICE_RESET

Signed-off-by: Nipun Gupta <nipun.gupta@amd.com>
Reviewed-by: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Tested-by: Nikhil Agarwal <nikhil.agarwal@amd.com>
---

CDX bus is now merged on linux-next tree. So sending VFIO driver
patch for CDX bus.

 MAINTAINERS                         |   1 +
 drivers/vfio/Kconfig                |   1 +
 drivers/vfio/Makefile               |   1 +
 drivers/vfio/cdx/Kconfig            |  17 ++
 drivers/vfio/cdx/Makefile           |   8 +
 drivers/vfio/cdx/vfio_cdx.c         | 314 ++++++++++++++++++++++++++++
 drivers/vfio/cdx/vfio_cdx_private.h |  32 +++
 7 files changed, 374 insertions(+)
 create mode 100644 drivers/vfio/cdx/Kconfig
 create mode 100644 drivers/vfio/cdx/Makefile
 create mode 100644 drivers/vfio/cdx/vfio_cdx.c
 create mode 100644 drivers/vfio/cdx/vfio_cdx_private.h
  

Comments

Alex Williamson April 3, 2023, 9:28 p.m. UTC | #1
On Mon, 3 Apr 2023 19:55:25 +0530
Nipun Gupta <nipun.gupta@amd.com> wrote:
> diff --git a/drivers/vfio/cdx/Makefile b/drivers/vfio/cdx/Makefile
> new file mode 100644
> index 000000000000..82e4ef412c0f
> --- /dev/null
> +++ b/drivers/vfio/cdx/Makefile
...
> +static int vfio_cdx_mmap_mmio(struct vfio_cdx_region region,
> +			      struct vm_area_struct *vma)
> +{
> +	u64 size = vma->vm_end - vma->vm_start;
> +	u64 pgoff, base;
> +
> +	pgoff = vma->vm_pgoff &
> +		((1U << (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
> +	base = pgoff << PAGE_SHIFT;
> +
> +	if (region.size < PAGE_SIZE || base + size > region.size)
> +		return -EINVAL;
> +
> +	vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff;
> +	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> +
> +	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
> +			       size, vma->vm_page_prot);
> +}
> +
> +static int vfio_cdx_mmap(struct vfio_device *core_vdev,
> +			 struct vm_area_struct *vma)
> +{
> +	struct vfio_cdx_device *vdev =
> +		container_of(core_vdev, struct vfio_cdx_device, vdev);
> +	struct cdx_device *cdx_dev = vdev->cdx_dev;
> +	unsigned int index;
> +
> +	index = vma->vm_pgoff >> (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT);
> +
> +	if (vma->vm_end < vma->vm_start)
> +		return -EINVAL;
> +	if (vma->vm_start & ~PAGE_MASK)
> +		return -EINVAL;
> +	if (vma->vm_end & ~PAGE_MASK)
> +		return -EINVAL;
> +	if (!(vma->vm_flags & VM_SHARED))
> +		return -EINVAL;
> +	if (index >= cdx_dev->res_count)
> +		return -EINVAL;
> +
> +	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP))
> +		return -EINVAL;
> +
> +	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ) &&
> +	    (vma->vm_flags & VM_READ))
> +		return -EINVAL;
> +
> +	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE) &&
> +	    (vma->vm_flags & VM_WRITE))
> +		return -EINVAL;
> +
> +	vma->vm_private_data = cdx_dev;
> +
> +	return vfio_cdx_mmap_mmio(vdev->regions[index], vma);
> +}

I see discussion of MMIO_REGIONS_ENABLE controlling host access to the
device in mc_cdx_pcol.h.  Is a user of vfio-cdx able to manipulate
whether MMIO space of the device is enabled?  If so, what's the system
response to accessing MMIO through the mmap while disabled?  Is MMIO
space accessible even through calling the RESET ioctl?  Is there a
public specification somewhere for CDX?  Thanks,

Alex
  
Gupta, Nipun April 4, 2023, 2:51 p.m. UTC | #2
On 4/4/2023 2:58 AM, Alex Williamson wrote:
> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
> 
> 
> On Mon, 3 Apr 2023 19:55:25 +0530
> Nipun Gupta <nipun.gupta@amd.com> wrote:
>> diff --git a/drivers/vfio/cdx/Makefile b/drivers/vfio/cdx/Makefile
>> new file mode 100644
>> index 000000000000..82e4ef412c0f
>> --- /dev/null
>> +++ b/drivers/vfio/cdx/Makefile
> ...
>> +static int vfio_cdx_mmap_mmio(struct vfio_cdx_region region,
>> +                           struct vm_area_struct *vma)
>> +{
>> +     u64 size = vma->vm_end - vma->vm_start;
>> +     u64 pgoff, base;
>> +
>> +     pgoff = vma->vm_pgoff &
>> +             ((1U << (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
>> +     base = pgoff << PAGE_SHIFT;
>> +
>> +     if (region.size < PAGE_SIZE || base + size > region.size)
>> +             return -EINVAL;
>> +
>> +     vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff;
>> +     vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
>> +
>> +     return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
>> +                            size, vma->vm_page_prot);
>> +}
>> +
>> +static int vfio_cdx_mmap(struct vfio_device *core_vdev,
>> +                      struct vm_area_struct *vma)
>> +{
>> +     struct vfio_cdx_device *vdev =
>> +             container_of(core_vdev, struct vfio_cdx_device, vdev);
>> +     struct cdx_device *cdx_dev = vdev->cdx_dev;
>> +     unsigned int index;
>> +
>> +     index = vma->vm_pgoff >> (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT);
>> +
>> +     if (vma->vm_end < vma->vm_start)
>> +             return -EINVAL;
>> +     if (vma->vm_start & ~PAGE_MASK)
>> +             return -EINVAL;
>> +     if (vma->vm_end & ~PAGE_MASK)
>> +             return -EINVAL;
>> +     if (!(vma->vm_flags & VM_SHARED))
>> +             return -EINVAL;
>> +     if (index >= cdx_dev->res_count)
>> +             return -EINVAL;
>> +
>> +     if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP))
>> +             return -EINVAL;
>> +
>> +     if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ) &&
>> +         (vma->vm_flags & VM_READ))
>> +             return -EINVAL;
>> +
>> +     if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE) &&
>> +         (vma->vm_flags & VM_WRITE))
>> +             return -EINVAL;
>> +
>> +     vma->vm_private_data = cdx_dev;
>> +
>> +     return vfio_cdx_mmap_mmio(vdev->regions[index], vma);
>> +}
> 
> I see discussion of MMIO_REGIONS_ENABLE controlling host access to the
> device in mc_cdx_pcol.h.  Is a user of vfio-cdx able to manipulate
> whether MMIO space of the device is enabled?  If so, what's the system
> response to accessing MMIO through the mmap while disabled?

The MMIO enable/disable has been added in mc_cdx_pcol.h from the future 
support perspective, but it is not currently supported, as all the CDX 
devices have the MMIO enable flag permanently set which cannot be 
disabled. Due to this we have not added any interface/API in the CDX bus 
to disable MMIO for the device. This is still under discussion and 
future patches will add complete support for this.

That said, if required we can add a flag currently in the "cdx_device" 
which will be set when for MMIO enabled (it would be by default enabled 
for now), and depending on this flag VFIO can return error during mmap, 
but we would prefer to defer it to be added along with the complete 
support for MMIO enable/disable in the CDX bus.

> Is MMIO
> space accessible even through calling the RESET ioctl?

Yes, MMIO region would be accessible even through calling reset, but 
user may not see the correct values as the device is being reset.


> Is there a
> public specification somewhere for CDX?  Thanks,

I am afraid there is no public specification for CDX as of now.

Thanks,
Nipun

> 
> Alex
>
  
Jason Gunthorpe April 5, 2023, 3:33 p.m. UTC | #3
On Mon, Apr 03, 2023 at 07:55:25PM +0530, Nipun Gupta wrote:

> +enum {
> +	CDX_ID_F_VFIO_DRIVER_OVERRIDE = 1,
> +};

This seems to be missing the file2alias part.

> +static void vfio_cdx_regions_cleanup(struct vfio_cdx_device *vdev)
> +{
> +	kfree(vdev->regions);
> +}
> +
> +static int vfio_cdx_reset_device(struct vfio_cdx_device *vdev)
> +{
> +	return cdx_dev_reset(&vdev->cdx_dev->dev);
> +}

Wrapper functions should be avoided.

> +static void vfio_cdx_close_device(struct vfio_device *core_vdev)
> +{
> +	struct vfio_cdx_device *vdev =
> +		container_of(core_vdev, struct vfio_cdx_device, vdev);
> +	int ret;
> +
> +	vfio_cdx_regions_cleanup(vdev);
> +
> +	/* reset the device before cleaning up the interrupts */
> +	ret = vfio_cdx_reset_device(vdev);
> +	if (WARN_ON(ret))
> +		dev_warn(core_vdev->dev,
> +			 "VFIO_CDX: reset device has failed (%d)\n", ret);

This is pretty problematic.. if the reset can fail the device is
returned to the system in an unknown state and it seems pretty likely
that it can be a way to attack the kernel.

> +	case VFIO_DEVICE_RESET:
> +	{
> +		return vfio_cdx_reset_device(vdev);
> +	}

What happens to MMIO access during this reset?

> +static int vfio_cdx_mmap_mmio(struct vfio_cdx_region region,
> +			      struct vm_area_struct *vma)
> +{
> +	u64 size = vma->vm_end - vma->vm_start;
> +	u64 pgoff, base;
> +
> +	pgoff = vma->vm_pgoff &
> +		((1U << (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
> +	base = pgoff << PAGE_SHIFT;
> +
> +	if (region.size < PAGE_SIZE || base + size > region.size)
> +		return -EINVAL;
> +
> +	vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff;
> +	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);

pgprot_device

> +	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
> +			       size, vma->vm_page_prot);

io_remap_pfn_range
> +static int vfio_cdx_mmap(struct vfio_device *core_vdev,
> +			 struct vm_area_struct *vma)
> +{
> +	struct vfio_cdx_device *vdev =
> +		container_of(core_vdev, struct vfio_cdx_device, vdev);
> +	struct cdx_device *cdx_dev = vdev->cdx_dev;
> +	unsigned int index;
> +
> +	index = vma->vm_pgoff >> (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT);
> +
> +	if (vma->vm_end < vma->vm_start)
> +		return -EINVAL;
> +	if (vma->vm_start & ~PAGE_MASK)
> +		return -EINVAL;
> +	if (vma->vm_end & ~PAGE_MASK)
> +		return -EINVAL;

The core code already assures these checks.

> +	if (!(vma->vm_flags & VM_SHARED))
> +		return -EINVAL;
> +	if (index >= cdx_dev->res_count)
> +		return -EINVAL;
> +
> +	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP))
> +		return -EINVAL;
> +
> +	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ) &&
> +	    (vma->vm_flags & VM_READ))
> +		return -EINVAL;
> +
> +	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE) &&
> +	    (vma->vm_flags & VM_WRITE))
> +		return -EINVAL;
> +
> +	vma->vm_private_data = cdx_dev;

not needed

> diff --git a/drivers/vfio/cdx/vfio_cdx_private.h b/drivers/vfio/cdx/vfio_cdx_private.h
> new file mode 100644
> index 000000000000..8b6f1ee3f5cd
> --- /dev/null
> +++ b/drivers/vfio/cdx/vfio_cdx_private.h
> @@ -0,0 +1,32 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
> + */
> +
> +#ifndef VFIO_CDX_PRIVATE_H
> +#define VFIO_CDX_PRIVATE_H
> +
> +#define VFIO_CDX_OFFSET_SHIFT    40
> +#define VFIO_CDX_OFFSET_MASK (((u64)(1) << VFIO_CDX_OFFSET_SHIFT) - 1)
> +
> +#define VFIO_CDX_OFFSET_TO_INDEX(off) ((off) >> VFIO_CDX_OFFSET_SHIFT)
> +
> +#define VFIO_CDX_INDEX_TO_OFFSET(index)	\
> +	((u64)(index) << VFIO_CDX_OFFSET_SHIFT)

use static inlines for function-line macros

> +struct vfio_cdx_region {
> +	u32			flags;
> +	u32			type;
> +	u64			addr;
> +	resource_size_t		size;
> +	void __iomem		*ioaddr;
> +};
> +
> +struct vfio_cdx_device {
> +	struct vfio_device	vdev;
> +	struct cdx_device	*cdx_dev;
> +	struct device		*dev;
> +	struct vfio_cdx_region	*regions;
> +};

This header file does not seem necessary right now

Jason
  
Gupta, Nipun April 7, 2023, 5:03 a.m. UTC | #4
On 4/5/2023 9:03 PM, Jason Gunthorpe wrote:
> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
> 
> 
> On Mon, Apr 03, 2023 at 07:55:25PM +0530, Nipun Gupta wrote:
> 
>> +enum {
>> +     CDX_ID_F_VFIO_DRIVER_OVERRIDE = 1,
>> +};
> 
> This seems to be missing the file2alias part.

Sure. Will add this.

> 
>> +static void vfio_cdx_regions_cleanup(struct vfio_cdx_device *vdev)
>> +{
>> +     kfree(vdev->regions);
>> +}
>> +
>> +static int vfio_cdx_reset_device(struct vfio_cdx_device *vdev)
>> +{
>> +     return cdx_dev_reset(&vdev->cdx_dev->dev);
>> +}
> 
> Wrapper functions should be avoided.

Agree

> 
>> +static void vfio_cdx_close_device(struct vfio_device *core_vdev)
>> +{
>> +     struct vfio_cdx_device *vdev =
>> +             container_of(core_vdev, struct vfio_cdx_device, vdev);
>> +     int ret;
>> +
>> +     vfio_cdx_regions_cleanup(vdev);
>> +
>> +     /* reset the device before cleaning up the interrupts */
>> +     ret = vfio_cdx_reset_device(vdev);
>> +     if (WARN_ON(ret))
>> +             dev_warn(core_vdev->dev,
>> +                      "VFIO_CDX: reset device has failed (%d)\n", ret);
> 
> This is pretty problematic.. if the reset can fail the device is
> returned to the system in an unknown state and it seems pretty likely
> that it can be a way to attack the kernel.

We will update the code to disable the device in case of failures.

> 
>> +     case VFIO_DEVICE_RESET:
>> +     {
>> +             return vfio_cdx_reset_device(vdev);
>> +     }
> 
> What happens to MMIO access during this reset?

There is no config space here; and access to mmap space will go as usual 
but user may not see the expected behavior as the device is being reset. 
It is responsibility of user of VFIO device to serialize such access and 
reset.

> 
>> +static int vfio_cdx_mmap_mmio(struct vfio_cdx_region region,
>> +                           struct vm_area_struct *vma)
>> +{
>> +     u64 size = vma->vm_end - vma->vm_start;
>> +     u64 pgoff, base;
>> +
>> +     pgoff = vma->vm_pgoff &
>> +             ((1U << (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
>> +     base = pgoff << PAGE_SHIFT;
>> +
>> +     if (region.size < PAGE_SIZE || base + size > region.size)
>> +             return -EINVAL;
>> +
>> +     vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff;
>> +     vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> 
> pgprot_device

Will update..

> 
>> +     return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
>> +                            size, vma->vm_page_prot);
> 
> io_remap_pfn_range
>> +static int vfio_cdx_mmap(struct vfio_device *core_vdev,
>> +                      struct vm_area_struct *vma)
>> +{
>> +     struct vfio_cdx_device *vdev =
>> +             container_of(core_vdev, struct vfio_cdx_device, vdev);
>> +     struct cdx_device *cdx_dev = vdev->cdx_dev;
>> +     unsigned int index;
>> +
>> +     index = vma->vm_pgoff >> (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT);
>> +
>> +     if (vma->vm_end < vma->vm_start)
>> +             return -EINVAL;
>> +     if (vma->vm_start & ~PAGE_MASK)
>> +             return -EINVAL;
>> +     if (vma->vm_end & ~PAGE_MASK)
>> +             return -EINVAL;
> 
> The core code already assures these checks.

Sure will remove these unnecessary checks.

> 
>> +     if (!(vma->vm_flags & VM_SHARED))
>> +             return -EINVAL;
>> +     if (index >= cdx_dev->res_count)
>> +             return -EINVAL;
>> +
>> +     if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP))
>> +             return -EINVAL;
>> +
>> +     if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ) &&
>> +         (vma->vm_flags & VM_READ))
>> +             return -EINVAL;
>> +
>> +     if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE) &&
>> +         (vma->vm_flags & VM_WRITE))
>> +             return -EINVAL;
>> +
>> +     vma->vm_private_data = cdx_dev;
> 
> not needed

Okay

> 
>> diff --git a/drivers/vfio/cdx/vfio_cdx_private.h b/drivers/vfio/cdx/vfio_cdx_private.h
>> new file mode 100644
>> index 000000000000..8b6f1ee3f5cd
>> --- /dev/null
>> +++ b/drivers/vfio/cdx/vfio_cdx_private.h
>> @@ -0,0 +1,32 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
>> + */
>> +
>> +#ifndef VFIO_CDX_PRIVATE_H
>> +#define VFIO_CDX_PRIVATE_H
>> +
>> +#define VFIO_CDX_OFFSET_SHIFT    40
>> +#define VFIO_CDX_OFFSET_MASK (((u64)(1) << VFIO_CDX_OFFSET_SHIFT) - 1)
>> +
>> +#define VFIO_CDX_OFFSET_TO_INDEX(off) ((off) >> VFIO_CDX_OFFSET_SHIFT)
>> +
>> +#define VFIO_CDX_INDEX_TO_OFFSET(index)      \
>> +     ((u64)(index) << VFIO_CDX_OFFSET_SHIFT)
> 
> use static inlines for function-line macros

Okay. Will update the code.

> 
>> +struct vfio_cdx_region {
>> +     u32                     flags;
>> +     u32                     type;
>> +     u64                     addr;
>> +     resource_size_t         size;
>> +     void __iomem            *ioaddr;
>> +};
>> +
>> +struct vfio_cdx_device {
>> +     struct vfio_device      vdev;
>> +     struct cdx_device       *cdx_dev;
>> +     struct device           *dev;
>> +     struct vfio_cdx_region  *regions;
>> +};
> 
> This header file does not seem necessary right now

There is work in progress which would add support of MSI in VFIO-CDX 
driver, and where would be additional file/s. So we would like to keep 
the header file separate.

Thanks,
Nipun

> 
> Jason
  
Gupta, Nipun April 13, 2023, 1:05 p.m. UTC | #5
> -----Original Message-----
> From: Gupta, Nipun
> Sent: Friday, April 7, 2023 10:35 AM
> To: Jason Gunthorpe <jgg@ziepe.ca>
> Cc: alex.williamson@redhat.com; linux-kernel@vger.kernel.org;
> kvm@vger.kernel.org; git (AMD-Xilinx) <git@amd.com>; Anand, Harpreet
> <harpreet.anand@amd.com>; Simek, Michal <michal.simek@amd.com>;
> Agarwal, Nikhil <nikhil.agarwal@amd.com>; Jansen Van Vuuren, Pieter
> <pieter.jansen-van-vuuren@amd.com>; okaya@kernel.org
> Subject: Re: [PATCH] vfio/cdx: add support for CDX bus
> 

<snip>

> 
> >
> >> +static void vfio_cdx_close_device(struct vfio_device *core_vdev)
> >> +{
> >> +     struct vfio_cdx_device *vdev =
> >> +             container_of(core_vdev, struct vfio_cdx_device, vdev);
> >> +     int ret;
> >> +
> >> +     vfio_cdx_regions_cleanup(vdev);
> >> +
> >> +     /* reset the device before cleaning up the interrupts */
> >> +     ret = vfio_cdx_reset_device(vdev);
> >> +     if (WARN_ON(ret))
> >> +             dev_warn(core_vdev->dev,
> >> +                      "VFIO_CDX: reset device has failed (%d)\n", ret);
> >
> > This is pretty problematic.. if the reset can fail the device is
> > returned to the system in an unknown state and it seems pretty likely
> > that it can be a way to attack the kernel.
> 
> We will update the code to disable the device in case of failures.

We double checked with firmware/hardware team, when driver sends CDX
device reset command to firmware, it actually quiesce and then reset the device.
So, the device remains disabled in case of failures.

Regards,
Nipun
  

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 99b665e85f0a..2acb3dbff174 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -970,6 +970,7 @@  M:	Nikhil Agarwal <nikhil.agarwal@amd.com>
 S:	Maintained
 F:	Documentation/devicetree/bindings/bus/xlnx,versal-net-cdx.yaml
 F:	drivers/cdx/*
+F:	drivers/vfio/cdx/*
 F:	include/linux/cdx/*
 
 AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 89e06c981e43..aba36f5be4ec 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -57,6 +57,7 @@  source "drivers/vfio/pci/Kconfig"
 source "drivers/vfio/platform/Kconfig"
 source "drivers/vfio/mdev/Kconfig"
 source "drivers/vfio/fsl-mc/Kconfig"
+source "drivers/vfio/cdx/Kconfig"
 endif
 
 source "virt/lib/Kconfig"
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 70e7dcb302ef..1a27b2516612 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -14,3 +14,4 @@  obj-$(CONFIG_VFIO_PCI) += pci/
 obj-$(CONFIG_VFIO_PLATFORM) += platform/
 obj-$(CONFIG_VFIO_MDEV) += mdev/
 obj-$(CONFIG_VFIO_FSL_MC) += fsl-mc/
+obj-$(CONFIG_VFIO_CDX) += cdx/
diff --git a/drivers/vfio/cdx/Kconfig b/drivers/vfio/cdx/Kconfig
new file mode 100644
index 000000000000..e6de0a0caa32
--- /dev/null
+++ b/drivers/vfio/cdx/Kconfig
@@ -0,0 +1,17 @@ 
+# SPDX-License-Identifier: GPL-2.0
+#
+# VFIO CDX configuration
+#
+# Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+#
+
+config VFIO_CDX
+	tristate "VFIO support for CDX bus devices"
+	depends on CDX_BUS
+	select EVENTFD
+	help
+	  Driver to enable VFIO support for the devices on CDX bus.
+	  This is required to make use of CDX devices present in
+	  the system using the VFIO framework.
+
+	  If you don't know what to do here, say N.
diff --git a/drivers/vfio/cdx/Makefile b/drivers/vfio/cdx/Makefile
new file mode 100644
index 000000000000..82e4ef412c0f
--- /dev/null
+++ b/drivers/vfio/cdx/Makefile
@@ -0,0 +1,8 @@ 
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+#
+
+obj-$(CONFIG_VFIO_CDX) += vfio-cdx.o
+
+vfio-cdx-objs := vfio_cdx.o
diff --git a/drivers/vfio/cdx/vfio_cdx.c b/drivers/vfio/cdx/vfio_cdx.c
new file mode 100644
index 000000000000..c23be3e06495
--- /dev/null
+++ b/drivers/vfio/cdx/vfio_cdx.c
@@ -0,0 +1,314 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+ */
+
+#include <linux/device.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/vfio.h>
+#include <linux/cdx/cdx_bus.h>
+#include <linux/delay.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
+
+#include "vfio_cdx_private.h"
+
+static struct cdx_driver vfio_cdx_driver;
+
+enum {
+	CDX_ID_F_VFIO_DRIVER_OVERRIDE = 1,
+};
+
+static int vfio_cdx_init_device(struct vfio_device *core_vdev)
+{
+	struct vfio_cdx_device *vdev =
+		container_of(core_vdev, struct vfio_cdx_device, vdev);
+	struct cdx_device *cdx_dev = to_cdx_device(core_vdev->dev);
+
+	vdev->cdx_dev = cdx_dev;
+	vdev->dev = &cdx_dev->dev;
+
+	return 0;
+}
+
+static void vfio_cdx_release_device(struct vfio_device *core_vdev)
+{
+	struct vfio_cdx_device *vdev =
+		container_of(core_vdev, struct vfio_cdx_device, vdev);
+
+	vdev->cdx_dev = NULL;
+	vdev->dev = NULL;
+}
+
+/**
+ * CDX_DRIVER_OVERRIDE_DEVICE_VFIO - macro used to describe a VFIO
+ *                                   "driver_override" CDX device.
+ * @vend: the 16 bit CDX Vendor ID
+ * @dev: the 16 bit CDX Device ID
+ *
+ * This macro is used to create a struct cdx_device_id that matches a
+ * specific device. driver_override will be set to
+ * CDX_ID_F_VFIO_DRIVER_OVERRIDE.
+ */
+#define CDX_DRIVER_OVERRIDE_DEVICE_VFIO(vend, dev) \
+	CDX_DEVICE_DRIVER_OVERRIDE(vend, dev, CDX_ID_F_VFIO_DRIVER_OVERRIDE)
+
+static int vfio_cdx_open_device(struct vfio_device *core_vdev)
+{
+	struct vfio_cdx_device *vdev =
+		container_of(core_vdev, struct vfio_cdx_device, vdev);
+	struct cdx_device *cdx_dev = vdev->cdx_dev;
+	int count = cdx_dev->res_count;
+	int i;
+
+	vdev->regions = kcalloc(count, sizeof(struct vfio_cdx_region),
+				GFP_KERNEL);
+	if (!vdev->regions)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++) {
+		struct resource *res = &cdx_dev->res[i];
+
+		vdev->regions[i].addr = res->start;
+		vdev->regions[i].size = resource_size(res);
+		vdev->regions[i].type = res->flags;
+		/*
+		 * Only regions addressed with PAGE granularity may be
+		 * MMAP'ed securely.
+		 */
+		if (!(vdev->regions[i].addr & ~PAGE_MASK) &&
+		    !(vdev->regions[i].size & ~PAGE_MASK))
+			vdev->regions[i].flags |=
+					VFIO_REGION_INFO_FLAG_MMAP;
+		vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
+		if (!(cdx_dev->res[i].flags & IORESOURCE_READONLY))
+			vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
+	}
+
+	return 0;
+}
+
+static void vfio_cdx_regions_cleanup(struct vfio_cdx_device *vdev)
+{
+	kfree(vdev->regions);
+}
+
+static int vfio_cdx_reset_device(struct vfio_cdx_device *vdev)
+{
+	return cdx_dev_reset(&vdev->cdx_dev->dev);
+}
+
+static void vfio_cdx_close_device(struct vfio_device *core_vdev)
+{
+	struct vfio_cdx_device *vdev =
+		container_of(core_vdev, struct vfio_cdx_device, vdev);
+	int ret;
+
+	vfio_cdx_regions_cleanup(vdev);
+
+	/* reset the device before cleaning up the interrupts */
+	ret = vfio_cdx_reset_device(vdev);
+	if (WARN_ON(ret))
+		dev_warn(core_vdev->dev,
+			 "VFIO_CDX: reset device has failed (%d)\n", ret);
+}
+
+static long vfio_cdx_ioctl(struct vfio_device *core_vdev,
+			   unsigned int cmd, unsigned long arg)
+{
+	struct vfio_cdx_device *vdev =
+		container_of(core_vdev, struct vfio_cdx_device, vdev);
+	struct cdx_device *cdx_dev = vdev->cdx_dev;
+	unsigned long minsz;
+
+	switch (cmd) {
+	case VFIO_DEVICE_GET_INFO:
+	{
+		struct vfio_device_info info;
+
+		minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		info.flags = VFIO_DEVICE_FLAGS_RESET;
+
+		info.num_regions = cdx_dev->res_count;
+		info.num_irqs = 0;
+
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
+	}
+	case VFIO_DEVICE_GET_REGION_INFO:
+	{
+		struct vfio_region_info info;
+
+		minsz = offsetofend(struct vfio_region_info, offset);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		if (info.index >= cdx_dev->res_count)
+			return -EINVAL;
+
+		/* map offset to the physical address  */
+		info.offset = VFIO_CDX_INDEX_TO_OFFSET(info.index);
+		info.size = vdev->regions[info.index].size;
+		info.flags = vdev->regions[info.index].flags;
+
+		if (copy_to_user((void __user *)arg, &info, minsz))
+			return -EFAULT;
+		return 0;
+	}
+	case VFIO_DEVICE_RESET:
+	{
+		return vfio_cdx_reset_device(vdev);
+	}
+	default:
+		return -ENOTTY;
+	}
+}
+
+static int vfio_cdx_mmap_mmio(struct vfio_cdx_region region,
+			      struct vm_area_struct *vma)
+{
+	u64 size = vma->vm_end - vma->vm_start;
+	u64 pgoff, base;
+
+	pgoff = vma->vm_pgoff &
+		((1U << (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+	base = pgoff << PAGE_SHIFT;
+
+	if (region.size < PAGE_SIZE || base + size > region.size)
+		return -EINVAL;
+
+	vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			       size, vma->vm_page_prot);
+}
+
+static int vfio_cdx_mmap(struct vfio_device *core_vdev,
+			 struct vm_area_struct *vma)
+{
+	struct vfio_cdx_device *vdev =
+		container_of(core_vdev, struct vfio_cdx_device, vdev);
+	struct cdx_device *cdx_dev = vdev->cdx_dev;
+	unsigned int index;
+
+	index = vma->vm_pgoff >> (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT);
+
+	if (vma->vm_end < vma->vm_start)
+		return -EINVAL;
+	if (vma->vm_start & ~PAGE_MASK)
+		return -EINVAL;
+	if (vma->vm_end & ~PAGE_MASK)
+		return -EINVAL;
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+	if (index >= cdx_dev->res_count)
+		return -EINVAL;
+
+	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP))
+		return -EINVAL;
+
+	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ) &&
+	    (vma->vm_flags & VM_READ))
+		return -EINVAL;
+
+	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE) &&
+	    (vma->vm_flags & VM_WRITE))
+		return -EINVAL;
+
+	vma->vm_private_data = cdx_dev;
+
+	return vfio_cdx_mmap_mmio(vdev->regions[index], vma);
+}
+
+static const struct vfio_device_ops vfio_cdx_ops = {
+	.name		= "vfio-cdx",
+	.init		= vfio_cdx_init_device,
+	.release	= vfio_cdx_release_device,
+	.open_device	= vfio_cdx_open_device,
+	.close_device	= vfio_cdx_close_device,
+	.ioctl		= vfio_cdx_ioctl,
+	.mmap		= vfio_cdx_mmap,
+};
+
+static int vfio_cdx_probe(struct cdx_device *cdx_dev)
+{
+	struct vfio_cdx_device *vdev = NULL;
+	struct device *dev = &cdx_dev->dev;
+	int ret;
+
+	vdev = vfio_alloc_device(vfio_cdx_device, vdev, dev,
+				 &vfio_cdx_ops);
+	if (IS_ERR(vdev))
+		return PTR_ERR(vdev);
+
+	ret = vfio_register_group_dev(&vdev->vdev);
+	if (ret) {
+		dev_err(dev, "VFIO_CDX: Failed to add to vfio group\n");
+		goto out_uninit;
+	}
+
+	dev_set_drvdata(dev, vdev);
+	return 0;
+
+out_uninit:
+	vfio_put_device(&vdev->vdev);
+	return ret;
+}
+
+static int vfio_cdx_remove(struct cdx_device *cdx_dev)
+{
+	struct device *dev = &cdx_dev->dev;
+	struct vfio_cdx_device *vdev;
+
+	vdev = dev_get_drvdata(dev);
+	vfio_unregister_group_dev(&vdev->vdev);
+	vfio_put_device(&vdev->vdev);
+
+	return 0;
+}
+
+static const struct cdx_device_id vfio_cdx_table[] = {
+	{ CDX_DRIVER_OVERRIDE_DEVICE_VFIO(CDX_ANY_ID, CDX_ANY_ID) }, /* match all by default */
+	{}
+};
+
+static struct cdx_driver vfio_cdx_driver = {
+	.probe		= vfio_cdx_probe,
+	.remove		= vfio_cdx_remove,
+	.match_id_table	= vfio_cdx_table,
+	.driver	= {
+		.name	= "vfio-cdx",
+		.owner	= THIS_MODULE,
+	},
+	.driver_managed_dma = true,
+};
+
+static int __init vfio_cdx_driver_init(void)
+{
+	return cdx_driver_register(&vfio_cdx_driver);
+}
+
+static void __exit vfio_cdx_driver_exit(void)
+{
+	cdx_driver_unregister(&vfio_cdx_driver);
+}
+
+module_init(vfio_cdx_driver_init);
+module_exit(vfio_cdx_driver_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("VFIO for CDX devices - User Level meta-driver");
diff --git a/drivers/vfio/cdx/vfio_cdx_private.h b/drivers/vfio/cdx/vfio_cdx_private.h
new file mode 100644
index 000000000000..8b6f1ee3f5cd
--- /dev/null
+++ b/drivers/vfio/cdx/vfio_cdx_private.h
@@ -0,0 +1,32 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+ */
+
+#ifndef VFIO_CDX_PRIVATE_H
+#define VFIO_CDX_PRIVATE_H
+
+#define VFIO_CDX_OFFSET_SHIFT    40
+#define VFIO_CDX_OFFSET_MASK (((u64)(1) << VFIO_CDX_OFFSET_SHIFT) - 1)
+
+#define VFIO_CDX_OFFSET_TO_INDEX(off) ((off) >> VFIO_CDX_OFFSET_SHIFT)
+
+#define VFIO_CDX_INDEX_TO_OFFSET(index)	\
+	((u64)(index) << VFIO_CDX_OFFSET_SHIFT)
+
+struct vfio_cdx_region {
+	u32			flags;
+	u32			type;
+	u64			addr;
+	resource_size_t		size;
+	void __iomem		*ioaddr;
+};
+
+struct vfio_cdx_device {
+	struct vfio_device	vdev;
+	struct cdx_device	*cdx_dev;
+	struct device		*dev;
+	struct vfio_cdx_region	*regions;
+};
+
+#endif /* VFIO_CDX_PRIVATE_H */