[1/5] mmc: sdhci-of-arasan: Fix SDHCI_RESET_ALL for CQHCI

Message ID 20221017205610.1.I29f6a2189e84e35ad89c1833793dca9e36c64297@changeid
State New
Headers
Series mmc: sdhci controllers: Fix SDHCI_RESET_ALL for CQHCI |

Commit Message

Brian Norris Oct. 18, 2022, 3:57 a.m. UTC
  SDHCI_RESET_ALL resets will reset the hardware CQE state, but we aren't
tracking that properly in software. When out of sync, we may trigger
various timeouts.

It's not typical to perform resets while CQE is enabled, but one
particular case I hit commonly enough: mmc_suspend() -> mmc_power_off().
Typically we will eventually deactivate CQE (cqhci_suspend() ->
cqhci_deactivate()), but that's not guaranteed -- in particular, if
we perform a partial (e.g., interrupted) system suspend.

The same bug was already found and fixed for two other drivers, in v5.7
and v5.9:

5cf583f1fb9c mmc: sdhci-msm: Deactivate CQE during SDHC reset
df57d73276b8 mmc: sdhci-pci: Fix SDHCI_RESET_ALL for CQHCI for Intel GLK-based controllers

The latter is especially prescient, saying "other drivers using CQHCI
might benefit from a similar change, if they also have CQHCI reset by
SDHCI_RESET_ALL."

So like these other patches, deactivate CQHCI when resetting the
controller. Also, move around the DT/caps handling, because
sdhci_setup_host() performs resets before we've initialized CQHCI. This
is the pattern followed in other SDHCI/CQHCI drivers.

Fixes: 84362d79f436 ("mmc: sdhci-of-arasan: Add CQHCI support for arasan,sdhci-5.1")
Cc: <stable@vger.kernel.org>
Signed-off-by: Brian Norris <briannorris@chromium.org>
---

 drivers/mmc/host/sdhci-of-arasan.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)
  

Comments

Guenter Roeck Oct. 18, 2022, 2:26 p.m. UTC | #1
On Mon, Oct 17, 2022 at 08:57:20PM -0700, Brian Norris wrote:
> SDHCI_RESET_ALL resets will reset the hardware CQE state, but we aren't
> tracking that properly in software. When out of sync, we may trigger
> various timeouts.
> 
> It's not typical to perform resets while CQE is enabled, but one
> particular case I hit commonly enough: mmc_suspend() -> mmc_power_off().
> Typically we will eventually deactivate CQE (cqhci_suspend() ->
> cqhci_deactivate()), but that's not guaranteed -- in particular, if
> we perform a partial (e.g., interrupted) system suspend.
> 
> The same bug was already found and fixed for two other drivers, in v5.7
> and v5.9:
> 
> 5cf583f1fb9c mmc: sdhci-msm: Deactivate CQE during SDHC reset
> df57d73276b8 mmc: sdhci-pci: Fix SDHCI_RESET_ALL for CQHCI for Intel GLK-based controllers
> 
> The latter is especially prescient, saying "other drivers using CQHCI
> might benefit from a similar change, if they also have CQHCI reset by
> SDHCI_RESET_ALL."
> 
> So like these other patches, deactivate CQHCI when resetting the
> controller. Also, move around the DT/caps handling, because
> sdhci_setup_host() performs resets before we've initialized CQHCI. This
> is the pattern followed in other SDHCI/CQHCI drivers.
> 
> Fixes: 84362d79f436 ("mmc: sdhci-of-arasan: Add CQHCI support for arasan,sdhci-5.1")
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Brian Norris <briannorris@chromium.org>

Reviewed-by: Guenter Roeck <linux@roeck-us.net>

> ---
> 
>  drivers/mmc/host/sdhci-of-arasan.c | 17 +++++++++++------
>  1 file changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
> index 3997cad1f793..1988a703781a 100644
> --- a/drivers/mmc/host/sdhci-of-arasan.c
> +++ b/drivers/mmc/host/sdhci-of-arasan.c
> @@ -366,6 +366,10 @@ static void sdhci_arasan_reset(struct sdhci_host *host, u8 mask)
>  	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
>  	struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
>  
> +	if ((host->mmc->caps2 & MMC_CAP2_CQE) && (mask & SDHCI_RESET_ALL) &&
> +	    sdhci_arasan->has_cqe)
> +		cqhci_deactivate(host->mmc);
> +
>  	sdhci_reset(host, mask);
>  
>  	if (sdhci_arasan->quirks & SDHCI_ARASAN_QUIRK_FORCE_CDTEST) {
> @@ -1521,7 +1525,8 @@ static int sdhci_arasan_register_sdclk(struct sdhci_arasan_data *sdhci_arasan,
>  	return 0;
>  }
>  
> -static int sdhci_arasan_add_host(struct sdhci_arasan_data *sdhci_arasan)
> +static int sdhci_arasan_add_host(struct sdhci_arasan_data *sdhci_arasan,
> +				 struct device_node *np)
>  {
>  	struct sdhci_host *host = sdhci_arasan->host;
>  	struct cqhci_host *cq_host;
> @@ -1549,6 +1554,10 @@ static int sdhci_arasan_add_host(struct sdhci_arasan_data *sdhci_arasan)
>  	if (dma64)
>  		cq_host->caps |= CQHCI_TASK_DESC_SZ_128;
>  
> +	host->mmc->caps2 |= MMC_CAP2_CQE;
> +	if (!of_property_read_bool(np, "disable-cqe-dcmd"))
> +		host->mmc->caps2 |= MMC_CAP2_CQE_DCMD;
> +
>  	ret = cqhci_init(cq_host, host->mmc, dma64);
>  	if (ret)
>  		goto cleanup;
> @@ -1705,13 +1714,9 @@ static int sdhci_arasan_probe(struct platform_device *pdev)
>  		host->mmc_host_ops.start_signal_voltage_switch =
>  					sdhci_arasan_voltage_switch;
>  		sdhci_arasan->has_cqe = true;
> -		host->mmc->caps2 |= MMC_CAP2_CQE;
> -
> -		if (!of_property_read_bool(np, "disable-cqe-dcmd"))
> -			host->mmc->caps2 |= MMC_CAP2_CQE_DCMD;
>  	}
>  
> -	ret = sdhci_arasan_add_host(sdhci_arasan);
> +	ret = sdhci_arasan_add_host(sdhci_arasan, np);
>  	if (ret)
>  		goto err_add_host;
>  
> -- 
> 2.38.0.413.g74048e4d9e-goog
>
  
Adrian Hunter Oct. 18, 2022, 4:13 p.m. UTC | #2
On 18/10/22 06:57, Brian Norris wrote:
> SDHCI_RESET_ALL resets will reset the hardware CQE state, but we aren't
> tracking that properly in software. When out of sync, we may trigger
> various timeouts.
> 
> It's not typical to perform resets while CQE is enabled, but one
> particular case I hit commonly enough: mmc_suspend() -> mmc_power_off().
> Typically we will eventually deactivate CQE (cqhci_suspend() ->
> cqhci_deactivate()), but that's not guaranteed -- in particular, if
> we perform a partial (e.g., interrupted) system suspend.
> 
> The same bug was already found and fixed for two other drivers, in v5.7
> and v5.9:
> 
> 5cf583f1fb9c mmc: sdhci-msm: Deactivate CQE during SDHC reset
> df57d73276b8 mmc: sdhci-pci: Fix SDHCI_RESET_ALL for CQHCI for Intel GLK-based controllers
> 
> The latter is especially prescient, saying "other drivers using CQHCI
> might benefit from a similar change, if they also have CQHCI reset by
> SDHCI_RESET_ALL."
> 
> So like these other patches, deactivate CQHCI when resetting the
> controller. Also, move around the DT/caps handling, because
> sdhci_setup_host() performs resets before we've initialized CQHCI. This
> is the pattern followed in other SDHCI/CQHCI drivers.

Did you consider just checking host->mmc->cqe_private like
sdhci_cqhci_reset() ?

> 
> Fixes: 84362d79f436 ("mmc: sdhci-of-arasan: Add CQHCI support for arasan,sdhci-5.1")
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Brian Norris <briannorris@chromium.org>
> ---
> 
>  drivers/mmc/host/sdhci-of-arasan.c | 17 +++++++++++------
>  1 file changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
> index 3997cad1f793..1988a703781a 100644
> --- a/drivers/mmc/host/sdhci-of-arasan.c
> +++ b/drivers/mmc/host/sdhci-of-arasan.c
> @@ -366,6 +366,10 @@ static void sdhci_arasan_reset(struct sdhci_host *host, u8 mask)
>  	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
>  	struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
>  
> +	if ((host->mmc->caps2 & MMC_CAP2_CQE) && (mask & SDHCI_RESET_ALL) &&
> +	    sdhci_arasan->has_cqe)
> +		cqhci_deactivate(host->mmc);
> +
>  	sdhci_reset(host, mask);
>  
>  	if (sdhci_arasan->quirks & SDHCI_ARASAN_QUIRK_FORCE_CDTEST) {
> @@ -1521,7 +1525,8 @@ static int sdhci_arasan_register_sdclk(struct sdhci_arasan_data *sdhci_arasan,
>  	return 0;
>  }
>  
> -static int sdhci_arasan_add_host(struct sdhci_arasan_data *sdhci_arasan)
> +static int sdhci_arasan_add_host(struct sdhci_arasan_data *sdhci_arasan,
> +				 struct device_node *np)
>  {
>  	struct sdhci_host *host = sdhci_arasan->host;
>  	struct cqhci_host *cq_host;
> @@ -1549,6 +1554,10 @@ static int sdhci_arasan_add_host(struct sdhci_arasan_data *sdhci_arasan)
>  	if (dma64)
>  		cq_host->caps |= CQHCI_TASK_DESC_SZ_128;
>  
> +	host->mmc->caps2 |= MMC_CAP2_CQE;
> +	if (!of_property_read_bool(np, "disable-cqe-dcmd"))
> +		host->mmc->caps2 |= MMC_CAP2_CQE_DCMD;
> +
>  	ret = cqhci_init(cq_host, host->mmc, dma64);
>  	if (ret)
>  		goto cleanup;
> @@ -1705,13 +1714,9 @@ static int sdhci_arasan_probe(struct platform_device *pdev)
>  		host->mmc_host_ops.start_signal_voltage_switch =
>  					sdhci_arasan_voltage_switch;
>  		sdhci_arasan->has_cqe = true;
> -		host->mmc->caps2 |= MMC_CAP2_CQE;
> -
> -		if (!of_property_read_bool(np, "disable-cqe-dcmd"))
> -			host->mmc->caps2 |= MMC_CAP2_CQE_DCMD;
>  	}
>  
> -	ret = sdhci_arasan_add_host(sdhci_arasan);
> +	ret = sdhci_arasan_add_host(sdhci_arasan, np);
>  	if (ret)
>  		goto err_add_host;
>
  
Brian Norris Oct. 18, 2022, 4:59 p.m. UTC | #3
Hi Adrian,

On Tue, Oct 18, 2022 at 07:13:28PM +0300, Adrian Hunter wrote:
> On 18/10/22 06:57, Brian Norris wrote:
> > So like these other patches, deactivate CQHCI when resetting the
> > controller. Also, move around the DT/caps handling, because
> > sdhci_setup_host() performs resets before we've initialized CQHCI. This
> > is the pattern followed in other SDHCI/CQHCI drivers.
> 
> Did you consider just checking host->mmc->cqe_private like
> sdhci_cqhci_reset() ?

I did not, although I am doing so now.

My first thought is that this feels a bit too private. Is the host
driver supposed to be memorizing the details of the CQHCI layer?

But on the plus side, that would remove some contortions needed here
(and also in sdhci-brcmstb.c).

Here's another option I previously considered: teaching
cqhci_deactivate() to check cqe_private itself. That would have the same
benefits, while keeping the private details in cqhci-core.c. How do you
like that?

(Tiny downside: cqhci-core.c got its rename in v5.12, so backporting
this to -stable would get slightly more difficult.)

Brian
  
Adrian Hunter Oct. 18, 2022, 5:58 p.m. UTC | #4
On 18/10/22 19:59, Brian Norris wrote:
> Hi Adrian,
> 
> On Tue, Oct 18, 2022 at 07:13:28PM +0300, Adrian Hunter wrote:
>> On 18/10/22 06:57, Brian Norris wrote:
>>> So like these other patches, deactivate CQHCI when resetting the
>>> controller. Also, move around the DT/caps handling, because
>>> sdhci_setup_host() performs resets before we've initialized CQHCI. This
>>> is the pattern followed in other SDHCI/CQHCI drivers.
>>
>> Did you consider just checking host->mmc->cqe_private like
>> sdhci_cqhci_reset() ?
> 
> I did not, although I am doing so now.
> 
> My first thought is that this feels a bit too private. Is the host
> driver supposed to be memorizing the details of the CQHCI layer?

Some drivers need to access CQHCI registers and get the reference
to cqhci_host from cqe_private, so that is already accepted.

> 
> But on the plus side, that would remove some contortions needed here
> (and also in sdhci-brcmstb.c).
> 
> Here's another option I previously considered: teaching
> cqhci_deactivate() to check cqe_private itself. That would have the same
> benefits, while keeping the private details in cqhci-core.c. How do you
> like that?

I don't mind either way.

> 
> (Tiny downside: cqhci-core.c got its rename in v5.12, so backporting
> this to -stable would get slightly more difficult.)
> 
> Brian
  

Patch

diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index 3997cad1f793..1988a703781a 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -366,6 +366,10 @@  static void sdhci_arasan_reset(struct sdhci_host *host, u8 mask)
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
 
+	if ((host->mmc->caps2 & MMC_CAP2_CQE) && (mask & SDHCI_RESET_ALL) &&
+	    sdhci_arasan->has_cqe)
+		cqhci_deactivate(host->mmc);
+
 	sdhci_reset(host, mask);
 
 	if (sdhci_arasan->quirks & SDHCI_ARASAN_QUIRK_FORCE_CDTEST) {
@@ -1521,7 +1525,8 @@  static int sdhci_arasan_register_sdclk(struct sdhci_arasan_data *sdhci_arasan,
 	return 0;
 }
 
-static int sdhci_arasan_add_host(struct sdhci_arasan_data *sdhci_arasan)
+static int sdhci_arasan_add_host(struct sdhci_arasan_data *sdhci_arasan,
+				 struct device_node *np)
 {
 	struct sdhci_host *host = sdhci_arasan->host;
 	struct cqhci_host *cq_host;
@@ -1549,6 +1554,10 @@  static int sdhci_arasan_add_host(struct sdhci_arasan_data *sdhci_arasan)
 	if (dma64)
 		cq_host->caps |= CQHCI_TASK_DESC_SZ_128;
 
+	host->mmc->caps2 |= MMC_CAP2_CQE;
+	if (!of_property_read_bool(np, "disable-cqe-dcmd"))
+		host->mmc->caps2 |= MMC_CAP2_CQE_DCMD;
+
 	ret = cqhci_init(cq_host, host->mmc, dma64);
 	if (ret)
 		goto cleanup;
@@ -1705,13 +1714,9 @@  static int sdhci_arasan_probe(struct platform_device *pdev)
 		host->mmc_host_ops.start_signal_voltage_switch =
 					sdhci_arasan_voltage_switch;
 		sdhci_arasan->has_cqe = true;
-		host->mmc->caps2 |= MMC_CAP2_CQE;
-
-		if (!of_property_read_bool(np, "disable-cqe-dcmd"))
-			host->mmc->caps2 |= MMC_CAP2_CQE_DCMD;
 	}
 
-	ret = sdhci_arasan_add_host(sdhci_arasan);
+	ret = sdhci_arasan_add_host(sdhci_arasan, np);
 	if (ret)
 		goto err_add_host;