[1/5] iommu/s390: Make attach succeed even if the device is in error state
Commit Message
If a zPCI device is in the error state while switching IOMMU domains
zpci_register_ioat() will fail and we would end up with the device not
attached to any domain. In this state since zdev->dma_table == NULL
a reset via zpci_hot_reset_device() would wrongfully re-initialize the
device for DMA API usage using zpci_dma_init_device(). As automatic
recovery is currently disabled while attached to an IOMMU domain this
only affects slot resets triggered through other means but will affect
automatic recovery once we switch to using dma-iommu.
Additionally with that switch common code expects attaching to the
default domain to always work so zpci_register_ioat() should only fail
if there is no chance to recover anyway, e.g. if the device has been
unplugged.
Improve the robustness of attach by specifically looking at the status
returned by zpci_mod_fc() to determine if the device is unavailable and
in this case simply ignore the error. Once the device is reset
zpci_hot_reset_device() will then correctly set the domain's DMA
translation tables.
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
arch/s390/include/asm/pci.h | 2 +-
arch/s390/kvm/pci.c | 6 ++++--
arch/s390/pci/pci.c | 11 ++++++-----
arch/s390/pci/pci_dma.c | 3 ++-
drivers/iommu/s390-iommu.c | 9 +++++++--
5 files changed, 20 insertions(+), 11 deletions(-)
Comments
On 10/18/22 10:51 AM, Niklas Schnelle wrote:
> If a zPCI device is in the error state while switching IOMMU domains
> zpci_register_ioat() will fail and we would end up with the device not
> attached to any domain. In this state since zdev->dma_table == NULL
> a reset via zpci_hot_reset_device() would wrongfully re-initialize the
> device for DMA API usage using zpci_dma_init_device(). As automatic
> recovery is currently disabled while attached to an IOMMU domain this
> only affects slot resets triggered through other means but will affect
> automatic recovery once we switch to using dma-iommu.
>
> Additionally with that switch common code expects attaching to the
> default domain to always work so zpci_register_ioat() should only fail
> if there is no chance to recover anyway, e.g. if the device has been
> unplugged.
>
> Improve the robustness of attach by specifically looking at the status
> returned by zpci_mod_fc() to determine if the device is unavailable and
> in this case simply ignore the error. Once the device is reset
> zpci_hot_reset_device() will then correctly set the domain's DMA
> translation tables.
>
> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Seems reasonable to me.
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
> arch/s390/include/asm/pci.h | 2 +-
> arch/s390/kvm/pci.c | 6 ++++--
> arch/s390/pci/pci.c | 11 ++++++-----
> arch/s390/pci/pci_dma.c | 3 ++-
> drivers/iommu/s390-iommu.c | 9 +++++++--
> 5 files changed, 20 insertions(+), 11 deletions(-)
>
> diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
> index 15f8714ca9b7..07361e2fd8c5 100644
> --- a/arch/s390/include/asm/pci.h
> +++ b/arch/s390/include/asm/pci.h
> @@ -221,7 +221,7 @@ void zpci_device_reserved(struct zpci_dev *zdev);
> bool zpci_is_device_configured(struct zpci_dev *zdev);
>
> int zpci_hot_reset_device(struct zpci_dev *zdev);
> -int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
> +int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64, u8 *);
> int zpci_unregister_ioat(struct zpci_dev *, u8);
> void zpci_remove_reserved_devices(void);
> void zpci_update_fh(struct zpci_dev *zdev, u32 fh);
> diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
> index c50c1645c0ae..03964c0e1fdf 100644
> --- a/arch/s390/kvm/pci.c
> +++ b/arch/s390/kvm/pci.c
> @@ -434,6 +434,7 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
> static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
> {
> struct zpci_dev *zdev = opaque;
> + u8 status;
> int rc;
>
> if (!zdev)
> @@ -486,7 +487,7 @@ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
>
> /* Re-register the IOMMU that was already created */
> rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
> - virt_to_phys(zdev->dma_table));
> + virt_to_phys(zdev->dma_table), &status);
> if (rc)
> goto clear_gisa;
>
> @@ -516,6 +517,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque)
> {
> struct zpci_dev *zdev = opaque;
> struct kvm *kvm;
> + u8 status;
>
> if (!zdev)
> return;
> @@ -554,7 +556,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque)
>
> /* Re-register the IOMMU that was already created */
> zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
> - virt_to_phys(zdev->dma_table));
> + virt_to_phys(zdev->dma_table), &status);
>
> out:
> spin_lock(&kvm->arch.kzdev_list_lock);
> diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
> index 73cdc5539384..a703dcd94a68 100644
> --- a/arch/s390/pci/pci.c
> +++ b/arch/s390/pci/pci.c
> @@ -116,20 +116,20 @@ EXPORT_SYMBOL_GPL(pci_proc_domain);
>
> /* Modify PCI: Register I/O address translation parameters */
> int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
> - u64 base, u64 limit, u64 iota)
> + u64 base, u64 limit, u64 iota, u8 *status)
> {
> u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT);
> struct zpci_fib fib = {0};
> - u8 cc, status;
> + u8 cc;
>
> WARN_ON_ONCE(iota & 0x3fff);
> fib.pba = base;
> fib.pal = limit;
> fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
> fib.gd = zdev->gisa;
> - cc = zpci_mod_fc(req, &fib, &status);
> + cc = zpci_mod_fc(req, &fib, status);
> if (cc)
> - zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
> + zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, *status);
> return cc;
> }
> EXPORT_SYMBOL_GPL(zpci_register_ioat);
> @@ -764,6 +764,7 @@ EXPORT_SYMBOL_GPL(zpci_disable_device);
> */
> int zpci_hot_reset_device(struct zpci_dev *zdev)
> {
> + u8 status;
> int rc;
>
> zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
> @@ -787,7 +788,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
>
> if (zdev->dma_table)
> rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
> - virt_to_phys(zdev->dma_table));
> + virt_to_phys(zdev->dma_table), &status);
> else
> rc = zpci_dma_init_device(zdev);
> if (rc) {
> diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
> index 227cf0a62800..dee825ee7305 100644
> --- a/arch/s390/pci/pci_dma.c
> +++ b/arch/s390/pci/pci_dma.c
> @@ -547,6 +547,7 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
>
> int zpci_dma_init_device(struct zpci_dev *zdev)
> {
> + u8 status;
> int rc;
>
> /*
> @@ -598,7 +599,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
>
> }
> if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
> - virt_to_phys(zdev->dma_table))) {
> + virt_to_phys(zdev->dma_table), &status)) {
> rc = -EIO;
> goto free_bitmap;
> }
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index 6c407b61b25a..ee88e717254b 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -98,6 +98,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
> struct s390_domain *s390_domain = to_s390_domain(domain);
> struct zpci_dev *zdev = to_zpci_dev(dev);
> unsigned long flags;
> + u8 status;
> int cc;
>
> if (!zdev)
> @@ -113,8 +114,12 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
> zpci_dma_exit_device(zdev);
>
> cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
> - virt_to_phys(s390_domain->dma_table));
> - if (cc)
> + virt_to_phys(s390_domain->dma_table), &status);
> + /*
> + * If the device is undergoing error recovery the reset code
> + * will re-establish the new domain.
> + */
> + if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
> return -EIO;
> zdev->dma_table = s390_domain->dma_table;
>
@@ -221,7 +221,7 @@ void zpci_device_reserved(struct zpci_dev *zdev);
bool zpci_is_device_configured(struct zpci_dev *zdev);
int zpci_hot_reset_device(struct zpci_dev *zdev);
-int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
+int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64, u8 *);
int zpci_unregister_ioat(struct zpci_dev *, u8);
void zpci_remove_reserved_devices(void);
void zpci_update_fh(struct zpci_dev *zdev, u32 fh);
@@ -434,6 +434,7 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
{
struct zpci_dev *zdev = opaque;
+ u8 status;
int rc;
if (!zdev)
@@ -486,7 +487,7 @@ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
/* Re-register the IOMMU that was already created */
rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table));
+ virt_to_phys(zdev->dma_table), &status);
if (rc)
goto clear_gisa;
@@ -516,6 +517,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque)
{
struct zpci_dev *zdev = opaque;
struct kvm *kvm;
+ u8 status;
if (!zdev)
return;
@@ -554,7 +556,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque)
/* Re-register the IOMMU that was already created */
zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table));
+ virt_to_phys(zdev->dma_table), &status);
out:
spin_lock(&kvm->arch.kzdev_list_lock);
@@ -116,20 +116,20 @@ EXPORT_SYMBOL_GPL(pci_proc_domain);
/* Modify PCI: Register I/O address translation parameters */
int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
- u64 base, u64 limit, u64 iota)
+ u64 base, u64 limit, u64 iota, u8 *status)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT);
struct zpci_fib fib = {0};
- u8 cc, status;
+ u8 cc;
WARN_ON_ONCE(iota & 0x3fff);
fib.pba = base;
fib.pal = limit;
fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
fib.gd = zdev->gisa;
- cc = zpci_mod_fc(req, &fib, &status);
+ cc = zpci_mod_fc(req, &fib, status);
if (cc)
- zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
+ zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, *status);
return cc;
}
EXPORT_SYMBOL_GPL(zpci_register_ioat);
@@ -764,6 +764,7 @@ EXPORT_SYMBOL_GPL(zpci_disable_device);
*/
int zpci_hot_reset_device(struct zpci_dev *zdev)
{
+ u8 status;
int rc;
zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
@@ -787,7 +788,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
if (zdev->dma_table)
rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table));
+ virt_to_phys(zdev->dma_table), &status);
else
rc = zpci_dma_init_device(zdev);
if (rc) {
@@ -547,6 +547,7 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
int zpci_dma_init_device(struct zpci_dev *zdev)
{
+ u8 status;
int rc;
/*
@@ -598,7 +599,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
}
if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table))) {
+ virt_to_phys(zdev->dma_table), &status)) {
rc = -EIO;
goto free_bitmap;
}
@@ -98,6 +98,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
struct s390_domain *s390_domain = to_s390_domain(domain);
struct zpci_dev *zdev = to_zpci_dev(dev);
unsigned long flags;
+ u8 status;
int cc;
if (!zdev)
@@ -113,8 +114,12 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
zpci_dma_exit_device(zdev);
cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(s390_domain->dma_table));
- if (cc)
+ virt_to_phys(s390_domain->dma_table), &status);
+ /*
+ * If the device is undergoing error recovery the reset code
+ * will re-establish the new domain.
+ */
+ if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
return -EIO;
zdev->dma_table = s390_domain->dma_table;