[v2] mmc: meson-gx: fix SDIO interrupt handling

Message ID 20221122132304.1482508-1-peter.suti@streamunlimited.com
State New
Headers
Series [v2] mmc: meson-gx: fix SDIO interrupt handling |

Commit Message

Peter Suti Nov. 22, 2022, 1:23 p.m. UTC
  With the interrupt support introduced in commit 066ecde sometimes the
Marvell-8987 wifi chip entered a deadlock using the marvell-sd-uapsta-8987
vendor driver. The cause seems to be that sometimes the interrupt handler
handles 2 IRQs and one of them disables the interrupts which are not reenabled
when all interrupts are finished. To work around this, disable all interrupts
when we are in the IRQ context and reenable them when the current IRQ is handled.

Fixes: 066ecde ("mmc: meson-gx: add SDIO interrupt support")

Signed-off-by: Peter Suti <peter.suti@streamunlimited.com>
---
Changes in v2:
	- use spin_lock instead of spin_lock_irqsave
	- only reenable interrupts if they were enabled already

 drivers/mmc/host/meson-gx-mmc.c | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)
  

Comments

Ulf Hansson Nov. 22, 2022, 3:19 p.m. UTC | #1
On Tue, 22 Nov 2022 at 14:23, Peter Suti <peter.suti@streamunlimited.com> wrote:
>
> With the interrupt support introduced in commit 066ecde sometimes the
> Marvell-8987 wifi chip entered a deadlock using the marvell-sd-uapsta-8987
> vendor driver. The cause seems to be that sometimes the interrupt handler
> handles 2 IRQs and one of them disables the interrupts which are not reenabled
> when all interrupts are finished. To work around this, disable all interrupts
> when we are in the IRQ context and reenable them when the current IRQ is handled.
>
> Fixes: 066ecde ("mmc: meson-gx: add SDIO interrupt support")
>
> Signed-off-by: Peter Suti <peter.suti@streamunlimited.com>
> ---
> Changes in v2:
>         - use spin_lock instead of spin_lock_irqsave
>         - only reenable interrupts if they were enabled already
>
>  drivers/mmc/host/meson-gx-mmc.c | 30 +++++++++++++++++++++++-------
>  1 file changed, 23 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
> index 6e5ea0213b47..0c95f8640b34 100644
> --- a/drivers/mmc/host/meson-gx-mmc.c
> +++ b/drivers/mmc/host/meson-gx-mmc.c
> @@ -934,6 +934,13 @@ static void meson_mmc_read_resp(struct mmc_host *mmc, struct mmc_command *cmd)
>         }
>  }
>
> +static bool __meson_mmc_sdio_irq_is_enabled(struct mmc_host *mmc)

Looks like it's better to pass a struct meson_host *host, rather than
a struct mmc_host *mmc.

> +{
> +       struct meson_host *host = mmc_priv(mmc);
> +
> +       return readl(host->regs + SD_EMMC_IRQ_EN) & IRQ_SDIO;
> +}
> +
>  static void __meson_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
>  {
>         struct meson_host *host = mmc_priv(mmc);
> @@ -950,6 +957,11 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
>         struct mmc_command *cmd;
>         u32 status, raw_status;
>         irqreturn_t ret = IRQ_NONE;
> +       bool irq_enabled;

Nitpick: (since I have a few comments anyway). May I suggest rename
this to sdio_irq_enabled instead?

> +
> +       spin_lock(&host->lock);
> +       irq_enabled = __meson_mmc_sdio_irq_is_enabled(host->mmc);
> +       __meson_mmc_enable_sdio_irq(host->mmc, 0);
>
>         raw_status = readl(host->regs + SD_EMMC_STATUS);
>         status = raw_status & (IRQ_EN_MASK | IRQ_SDIO);
> @@ -958,11 +970,11 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
>                 dev_dbg(host->dev,
>                         "Unexpected IRQ! irq_en 0x%08lx - status 0x%08x\n",
>                          IRQ_EN_MASK | IRQ_SDIO, raw_status);
> -               return IRQ_NONE;
> +               goto out_unlock;
>         }
>
>         if (WARN_ON(!host))
> -               return IRQ_NONE;
> +               goto out_unlock;

This part looks like it now becomes incorrectly redundant, since we
are now using "host->mmc" a few lines above while calling
__meson_mmc_sdio_irq_is_enabled().

Maybe move the new code below this part instead?

>
>         /* ack all raised interrupts */
>         writel(status, host->regs + SD_EMMC_STATUS);
> @@ -970,17 +982,16 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
>         cmd = host->cmd;
>
>         if (status & IRQ_SDIO) {
> -               spin_lock(&host->lock);
> -               __meson_mmc_enable_sdio_irq(host->mmc, 0);
>                 sdio_signal_irq(host->mmc);
> -               spin_unlock(&host->lock);
>                 status &= ~IRQ_SDIO;
> -               if (!status)
> +               if (!status) {
> +                       spin_unlock(&host->lock);
>                         return IRQ_HANDLED;
> +               }
>         }
>
>         if (WARN_ON(!cmd))
> -               return IRQ_NONE;
> +               goto out_unlock;
>
>         cmd->error = 0;
>         if (status & IRQ_CRC_ERR) {
> @@ -1023,6 +1034,11 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
>         if (ret == IRQ_HANDLED)
>                 meson_mmc_request_done(host->mmc, cmd->mrq);
>
> +out_unlock:
> +       if (irq_enabled)
> +               __meson_mmc_enable_sdio_irq(host->mmc, 1);
> +       spin_unlock(&host->lock);
> +
>         return ret;
>  }
>

Kind regards
Uffe
  
Heiner Kallweit Nov. 22, 2022, 9:41 p.m. UTC | #2
On 22.11.2022 14:23, Peter Suti wrote:
> With the interrupt support introduced in commit 066ecde sometimes the
> Marvell-8987 wifi chip entered a deadlock using the marvell-sd-uapsta-8987
> vendor driver. The cause seems to be that sometimes the interrupt handler
> handles 2 IRQs and one of them disables the interrupts which are not reenabled
> when all interrupts are finished. To work around this, disable all interrupts
> when we are in the IRQ context and reenable them when the current IRQ is handled.
> 
To me it's still not clear what you mean with "handles 2 IRQs".
Hard irq handlers aren't re-entrant. Can you provide the exact call sequence
for the issue you're facing?
Are SDIO interrupts handled on more than one CPU in your case?
Are you concerned about the hard irq handler running in parallel on more than one CPU?

The proposed patch looks hacky and somewhat bypasses the SDIO core logic
by partially re-enabling SDIO interrupts in the hard irq handler.

In the first review round I wrote the following but didn't see a feedback yet.
Did you check the linked discussion whether it may be related to your issue?

-> IIRC I had a similar/same problem in mind when discussing the following:
-> https://lore.kernel.org/linux-arm-kernel/CAPDyKFoameOb7d3cn8_ki1O6DbMEAFvkQh1uUsYp4S-Lkq41oQ@mail.gmail.com/
-> Not sure though whether it's related to the issue you're facing.



> Fixes: 066ecde ("mmc: meson-gx: add SDIO interrupt support")
> 
> Signed-off-by: Peter Suti <peter.suti@streamunlimited.com>
> ---
> Changes in v2:
> 	- use spin_lock instead of spin_lock_irqsave
> 	- only reenable interrupts if they were enabled already
> 
>  drivers/mmc/host/meson-gx-mmc.c | 30 +++++++++++++++++++++++-------
>  1 file changed, 23 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
> index 6e5ea0213b47..0c95f8640b34 100644
> --- a/drivers/mmc/host/meson-gx-mmc.c
> +++ b/drivers/mmc/host/meson-gx-mmc.c
> @@ -934,6 +934,13 @@ static void meson_mmc_read_resp(struct mmc_host *mmc, struct mmc_command *cmd)
>  	}
>  }
>  
> +static bool __meson_mmc_sdio_irq_is_enabled(struct mmc_host *mmc)
> +{
> +	struct meson_host *host = mmc_priv(mmc);
> +
> +	return readl(host->regs + SD_EMMC_IRQ_EN) & IRQ_SDIO;
> +}
> +
>  static void __meson_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
>  {
>  	struct meson_host *host = mmc_priv(mmc);
> @@ -950,6 +957,11 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
>  	struct mmc_command *cmd;
>  	u32 status, raw_status;
>  	irqreturn_t ret = IRQ_NONE;
> +	bool irq_enabled;
> +
> +	spin_lock(&host->lock);
> +	irq_enabled = __meson_mmc_sdio_irq_is_enabled(host->mmc);
> +	__meson_mmc_enable_sdio_irq(host->mmc, 0);
>  
>  	raw_status = readl(host->regs + SD_EMMC_STATUS);
>  	status = raw_status & (IRQ_EN_MASK | IRQ_SDIO);
> @@ -958,11 +970,11 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
>  		dev_dbg(host->dev,
>  			"Unexpected IRQ! irq_en 0x%08lx - status 0x%08x\n",
>  			 IRQ_EN_MASK | IRQ_SDIO, raw_status);
> -		return IRQ_NONE;
> +		goto out_unlock;
>  	}
>  
>  	if (WARN_ON(!host))
> -		return IRQ_NONE;
> +		goto out_unlock;
>  
>  	/* ack all raised interrupts */
>  	writel(status, host->regs + SD_EMMC_STATUS);
> @@ -970,17 +982,16 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
>  	cmd = host->cmd;
>  
>  	if (status & IRQ_SDIO) {
> -		spin_lock(&host->lock);
> -		__meson_mmc_enable_sdio_irq(host->mmc, 0);
>  		sdio_signal_irq(host->mmc);
> -		spin_unlock(&host->lock);
>  		status &= ~IRQ_SDIO;
> -		if (!status)
> +		if (!status) {
> +			spin_unlock(&host->lock);
>  			return IRQ_HANDLED;
> +		}
>  	}
>  
>  	if (WARN_ON(!cmd))
> -		return IRQ_NONE;
> +		goto out_unlock;
>  
>  	cmd->error = 0;
>  	if (status & IRQ_CRC_ERR) {
> @@ -1023,6 +1034,11 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
>  	if (ret == IRQ_HANDLED)
>  		meson_mmc_request_done(host->mmc, cmd->mrq);
>  
> +out_unlock:
> +	if (irq_enabled)
> +		__meson_mmc_enable_sdio_irq(host->mmc, 1);
> +	spin_unlock(&host->lock);
> +
>  	return ret;
>  }
>
  

Patch

diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index 6e5ea0213b47..0c95f8640b34 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -934,6 +934,13 @@  static void meson_mmc_read_resp(struct mmc_host *mmc, struct mmc_command *cmd)
 	}
 }
 
+static bool __meson_mmc_sdio_irq_is_enabled(struct mmc_host *mmc)
+{
+	struct meson_host *host = mmc_priv(mmc);
+
+	return readl(host->regs + SD_EMMC_IRQ_EN) & IRQ_SDIO;
+}
+
 static void __meson_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
 {
 	struct meson_host *host = mmc_priv(mmc);
@@ -950,6 +957,11 @@  static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
 	struct mmc_command *cmd;
 	u32 status, raw_status;
 	irqreturn_t ret = IRQ_NONE;
+	bool irq_enabled;
+
+	spin_lock(&host->lock);
+	irq_enabled = __meson_mmc_sdio_irq_is_enabled(host->mmc);
+	__meson_mmc_enable_sdio_irq(host->mmc, 0);
 
 	raw_status = readl(host->regs + SD_EMMC_STATUS);
 	status = raw_status & (IRQ_EN_MASK | IRQ_SDIO);
@@ -958,11 +970,11 @@  static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
 		dev_dbg(host->dev,
 			"Unexpected IRQ! irq_en 0x%08lx - status 0x%08x\n",
 			 IRQ_EN_MASK | IRQ_SDIO, raw_status);
-		return IRQ_NONE;
+		goto out_unlock;
 	}
 
 	if (WARN_ON(!host))
-		return IRQ_NONE;
+		goto out_unlock;
 
 	/* ack all raised interrupts */
 	writel(status, host->regs + SD_EMMC_STATUS);
@@ -970,17 +982,16 @@  static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
 	cmd = host->cmd;
 
 	if (status & IRQ_SDIO) {
-		spin_lock(&host->lock);
-		__meson_mmc_enable_sdio_irq(host->mmc, 0);
 		sdio_signal_irq(host->mmc);
-		spin_unlock(&host->lock);
 		status &= ~IRQ_SDIO;
-		if (!status)
+		if (!status) {
+			spin_unlock(&host->lock);
 			return IRQ_HANDLED;
+		}
 	}
 
 	if (WARN_ON(!cmd))
-		return IRQ_NONE;
+		goto out_unlock;
 
 	cmd->error = 0;
 	if (status & IRQ_CRC_ERR) {
@@ -1023,6 +1034,11 @@  static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
 	if (ret == IRQ_HANDLED)
 		meson_mmc_request_done(host->mmc, cmd->mrq);
 
+out_unlock:
+	if (irq_enabled)
+		__meson_mmc_enable_sdio_irq(host->mmc, 1);
+	spin_unlock(&host->lock);
+
 	return ret;
 }