[v7,3/6] iommu/s390: Fix potential s390_domain aperture shrinking

Message ID 20221017124558.1386337-4-schnelle@linux.ibm.com
State New
Headers
Series iommu/s390: Fixes related to attach and aperture handling |

Commit Message

Niklas Schnelle Oct. 17, 2022, 12:45 p.m. UTC
  The s390 IOMMU driver currently sets the IOMMU domain's aperture to
match the device specific DMA address range of the device that is first
attached. This is not ideal. For one if the domain has no device
attached in the meantime the aperture could be shrunk allowing
translations outside the aperture to exist in the translation tables.
Also this is a bit of a misuse of the aperture which really should
describe what addresses can be translated and not some device specific
limitations.

Instead of misusing the aperture like this we can instead create
reserved ranges for the ranges inaccessible to the attached devices
allowing devices with overlapping ranges to still share an IOMMU domain.
This also significantly simplifies s390_iommu_attach_device() allowing
us to move the aperture check to the beginning of the function and
removing the need to hold the device list's lock to check the aperture.

As we then use the same aperture for all domains and it only depends on
the table properties we can already check zdev->start_dma/end_dma at
probe time and turn the check on attach into a WARN_ON().

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
v5->v6:
- Return -EINVAL after WARN_ON() in attach
v4->v5:
- Make aperture check in attach a WARN_ON() and fail in probe if
  zdev->start_dma/end_dma doesn't git in aperture  (Jason)

 drivers/iommu/s390-iommu.c | 63 ++++++++++++++++++++++++++------------
 1 file changed, 43 insertions(+), 20 deletions(-)
  

Comments

Niklas Schnelle Oct. 24, 2022, 3:02 p.m. UTC | #1
On Mon, 2022-10-17 at 14:45 +0200, Niklas Schnelle wrote:
> The s390 IOMMU driver currently sets the IOMMU domain's aperture to
> match the device specific DMA address range of the device that is first
> attached. This is not ideal. For one if the domain has no device
> attached in the meantime the aperture could be shrunk allowing
> translations outside the aperture to exist in the translation tables.
> Also this is a bit of a misuse of the aperture which really should
> describe what addresses can be translated and not some device specific
> limitations.
> 
> Instead of misusing the aperture like this we can instead create
> reserved ranges for the ranges inaccessible to the attached devices
> allowing devices with overlapping ranges to still share an IOMMU domain.
> This also significantly simplifies s390_iommu_attach_device() allowing
> us to move the aperture check to the beginning of the function and
> removing the need to hold the device list's lock to check the aperture.
> 
> As we then use the same aperture for all domains and it only depends on
> the table properties we can already check zdev->start_dma/end_dma at
> probe time and turn the check on attach into a WARN_ON().
> 
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
> ---
> v5->v6:
> - Return -EINVAL after WARN_ON() in attach
> v4->v5:
> - Make aperture check in attach a WARN_ON() and fail in probe if
>   zdev->start_dma/end_dma doesn't git in aperture  (Jason)
> 
>  drivers/iommu/s390-iommu.c | 63 ++++++++++++++++++++++++++------------
>  1 file changed, 43 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index af83ccde16a4..c4203a37faa4 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -62,6 +62,9 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
>  		kfree(s390_domain);
>  		return NULL;
>  	}
> +	s390_domain->domain.geometry.force_aperture = true;
> +	s390_domain->domain.geometry.aperture_start = 0;
> +	s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
>  
>  	spin_lock_init(&s390_domain->dma_table_lock);
>  	spin_lock_init(&s390_domain->list_lock);
> @@ -102,11 +105,15 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
>  	struct s390_domain *s390_domain = to_s390_domain(domain);
>  	struct zpci_dev *zdev = to_zpci_dev(dev);
>  	unsigned long flags;
> -	int cc, rc = 0;
> +	int cc;
>  
>  	if (!zdev)
>  		return -ENODEV;
>  
> +	if (WARN_ON(domain->geometry.aperture_start > zdev->end_dma ||
> +		domain->geometry.aperture_end < zdev->start_dma))
> +		return -EINVAL;
> +
>  	if (zdev->s390_domain)
>  		__s390_iommu_detach_device(zdev);
>  	else if (zdev->dma_table)
> @@ -118,30 +125,14 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
>  		return -EIO;
>  	zdev->dma_table = s390_domain->dma_table;
>  
> -	spin_lock_irqsave(&s390_domain->list_lock, flags);
> -	/* First device defines the DMA range limits */
> -	if (list_empty(&s390_domain->devices)) {
> -		domain->geometry.aperture_start = zdev->start_dma;
> -		domain->geometry.aperture_end = zdev->end_dma;
> -		domain->geometry.force_aperture = true;
> -	/* Allow only devices with identical DMA range limits */
> -	} else if (domain->geometry.aperture_start != zdev->start_dma ||
> -		   domain->geometry.aperture_end != zdev->end_dma) {
> -		spin_unlock_irqrestore(&s390_domain->list_lock, flags);
> -		rc = -EINVAL;
> -		goto out_unregister;
> -	}
> +	zdev->dma_table = s390_domain->dma_table;
>  	zdev->s390_domain = s390_domain;
> +
> +	spin_lock_irqsave(&s390_domain->list_lock, flags);
>  	list_add(&zdev->iommu_list, &s390_domain->devices);
>  	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
>  
>  	return 0;
> -
> -out_unregister:
> -	zpci_unregister_ioat(zdev, 0);
> -	zdev->dma_table = NULL;
> -
> -	return rc;
>  }
>  
>  static void s390_iommu_detach_device(struct iommu_domain *domain,
> @@ -155,6 +146,30 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
>  	zpci_dma_init_device(zdev);
>  }
>  
> +static void s390_iommu_get_resv_regions(struct device *dev,
> +					struct list_head *list)
> +{
> +	struct zpci_dev *zdev = to_zpci_dev(dev);
> +	struct iommu_resv_region *region;
> +
> +	if (zdev->start_dma) {
> +		region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
> +						 IOMMU_RESV_RESERVED);

Heads up! The iommu_alloc_resv_region() function gained a gfp parameter
from v6.1-rc1 to v6.1-rc2 so the above needs "…, GFP_KERNEL);".
@Joerg, @Will if you don't mind I'll rebase on v6.1-rc2 and resend.
@Jason if you want to re-add your R-b this would be good time.


> +		if (!region)
> +			return;
> +		list_add_tail(&region->list, list);
> +	}
> +
> +	if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
> +		region = iommu_alloc_resv_region(zdev->end_dma + 1,
> +						 ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
> +						 0, IOMMU_RESV_RESERVED);

Same as above.

> +		if (!region)
> +			return;
> +		list_add_tail(&region->list, list);
> +	}
> +}
> +
>  static struct iommu_device *s390_iommu_probe_device(struct device *dev)
>  {
>  	struct zpci_dev *zdev;
> @@ -164,6 +179,13 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
>  
>  	zdev = to_zpci_dev(dev);
>  
> +	if (zdev->start_dma > zdev->end_dma ||
> +	    zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1)
> +		return ERR_PTR(-EINVAL);
> +
> +	if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
> +		zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
> +
>  	return &zdev->iommu_dev;
>  }
>  
> @@ -342,6 +364,7 @@ static const struct iommu_ops s390_iommu_ops = {
>  	.release_device = s390_iommu_release_device,
>  	.device_group = generic_device_group,
>  	.pgsize_bitmap = S390_IOMMU_PGSIZES,
> +	.get_resv_regions = s390_iommu_get_resv_regions,
>  	.default_domain_ops = &(const struct iommu_domain_ops) {
>  		.attach_dev	= s390_iommu_attach_device,
>  		.detach_dev	= s390_iommu_detach_device,
  
Jason Gunthorpe Oct. 24, 2022, 4:53 p.m. UTC | #2
On Mon, Oct 17, 2022 at 02:45:55PM +0200, Niklas Schnelle wrote:
> The s390 IOMMU driver currently sets the IOMMU domain's aperture to
> match the device specific DMA address range of the device that is first
> attached. This is not ideal. For one if the domain has no device
> attached in the meantime the aperture could be shrunk allowing
> translations outside the aperture to exist in the translation tables.
> Also this is a bit of a misuse of the aperture which really should
> describe what addresses can be translated and not some device specific
> limitations.
> 
> Instead of misusing the aperture like this we can instead create
> reserved ranges for the ranges inaccessible to the attached devices
> allowing devices with overlapping ranges to still share an IOMMU domain.
> This also significantly simplifies s390_iommu_attach_device() allowing
> us to move the aperture check to the beginning of the function and
> removing the need to hold the device list's lock to check the aperture.
> 
> As we then use the same aperture for all domains and it only depends on
> the table properties we can already check zdev->start_dma/end_dma at
> probe time and turn the check on attach into a WARN_ON().
> 
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
> ---
> v5->v6:
> - Return -EINVAL after WARN_ON() in attach
> v4->v5:
> - Make aperture check in attach a WARN_ON() and fail in probe if
>   zdev->start_dma/end_dma doesn't git in aperture  (Jason)

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>

Jason
  

Patch

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index af83ccde16a4..c4203a37faa4 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -62,6 +62,9 @@  static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
 		kfree(s390_domain);
 		return NULL;
 	}
+	s390_domain->domain.geometry.force_aperture = true;
+	s390_domain->domain.geometry.aperture_start = 0;
+	s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
 
 	spin_lock_init(&s390_domain->dma_table_lock);
 	spin_lock_init(&s390_domain->list_lock);
@@ -102,11 +105,15 @@  static int s390_iommu_attach_device(struct iommu_domain *domain,
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 	struct zpci_dev *zdev = to_zpci_dev(dev);
 	unsigned long flags;
-	int cc, rc = 0;
+	int cc;
 
 	if (!zdev)
 		return -ENODEV;
 
+	if (WARN_ON(domain->geometry.aperture_start > zdev->end_dma ||
+		domain->geometry.aperture_end < zdev->start_dma))
+		return -EINVAL;
+
 	if (zdev->s390_domain)
 		__s390_iommu_detach_device(zdev);
 	else if (zdev->dma_table)
@@ -118,30 +125,14 @@  static int s390_iommu_attach_device(struct iommu_domain *domain,
 		return -EIO;
 	zdev->dma_table = s390_domain->dma_table;
 
-	spin_lock_irqsave(&s390_domain->list_lock, flags);
-	/* First device defines the DMA range limits */
-	if (list_empty(&s390_domain->devices)) {
-		domain->geometry.aperture_start = zdev->start_dma;
-		domain->geometry.aperture_end = zdev->end_dma;
-		domain->geometry.force_aperture = true;
-	/* Allow only devices with identical DMA range limits */
-	} else if (domain->geometry.aperture_start != zdev->start_dma ||
-		   domain->geometry.aperture_end != zdev->end_dma) {
-		spin_unlock_irqrestore(&s390_domain->list_lock, flags);
-		rc = -EINVAL;
-		goto out_unregister;
-	}
+	zdev->dma_table = s390_domain->dma_table;
 	zdev->s390_domain = s390_domain;
+
+	spin_lock_irqsave(&s390_domain->list_lock, flags);
 	list_add(&zdev->iommu_list, &s390_domain->devices);
 	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
 
 	return 0;
-
-out_unregister:
-	zpci_unregister_ioat(zdev, 0);
-	zdev->dma_table = NULL;
-
-	return rc;
 }
 
 static void s390_iommu_detach_device(struct iommu_domain *domain,
@@ -155,6 +146,30 @@  static void s390_iommu_detach_device(struct iommu_domain *domain,
 	zpci_dma_init_device(zdev);
 }
 
+static void s390_iommu_get_resv_regions(struct device *dev,
+					struct list_head *list)
+{
+	struct zpci_dev *zdev = to_zpci_dev(dev);
+	struct iommu_resv_region *region;
+
+	if (zdev->start_dma) {
+		region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
+						 IOMMU_RESV_RESERVED);
+		if (!region)
+			return;
+		list_add_tail(&region->list, list);
+	}
+
+	if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
+		region = iommu_alloc_resv_region(zdev->end_dma + 1,
+						 ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
+						 0, IOMMU_RESV_RESERVED);
+		if (!region)
+			return;
+		list_add_tail(&region->list, list);
+	}
+}
+
 static struct iommu_device *s390_iommu_probe_device(struct device *dev)
 {
 	struct zpci_dev *zdev;
@@ -164,6 +179,13 @@  static struct iommu_device *s390_iommu_probe_device(struct device *dev)
 
 	zdev = to_zpci_dev(dev);
 
+	if (zdev->start_dma > zdev->end_dma ||
+	    zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1)
+		return ERR_PTR(-EINVAL);
+
+	if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
+		zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
+
 	return &zdev->iommu_dev;
 }
 
@@ -342,6 +364,7 @@  static const struct iommu_ops s390_iommu_ops = {
 	.release_device = s390_iommu_release_device,
 	.device_group = generic_device_group,
 	.pgsize_bitmap = S390_IOMMU_PGSIZES,
+	.get_resv_regions = s390_iommu_get_resv_regions,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= s390_iommu_attach_device,
 		.detach_dev	= s390_iommu_detach_device,