[v2] PCI: mediatek-gen3: Assert MAC reset only if PHY reset also present

Message ID 20240229092449.580971-1-angelogioacchino.delregno@collabora.com
State New
Headers
Series [v2] PCI: mediatek-gen3: Assert MAC reset only if PHY reset also present |

Commit Message

AngeloGioacchino Del Regno Feb. 29, 2024, 9:24 a.m. UTC
  Some SoCs have two PCI-Express controllers: in the case of MT8195,
one of them is using a dedicated PHY, but the other uses a combo PHY
that is shared with USB and in that case the PHY cannot be reset
from the PCIe driver, or USB functionality will be unable to resume.

Resetting the PCIe MAC without also resetting the PHY will result in
a full system lockup at PCIe resume time and the only option to
resume operation is to hard reboot the system (with a PMIC cut-off).

To resolve this issue, check if we've got both a PHY and a MAC reset
and, if not, never assert resets at PM suspend time: in that case,
the link is still getting powered down as both the clocks and the
power domains will go down anyway.

Fixes: d537dc125f07 ("PCI: mediatek-gen3: Add system PM support")
Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
---

Changes in v2:
 - Rebased over next-20240229

 drivers/pci/controller/pcie-mediatek-gen3.c | 25 ++++++++++++++-------
 1 file changed, 17 insertions(+), 8 deletions(-)
  

Comments

Nícolas F. R. A. Prado Feb. 29, 2024, 4:20 p.m. UTC | #1
On Thu, Feb 29, 2024 at 10:24:49AM +0100, AngeloGioacchino Del Regno wrote:
> Some SoCs have two PCI-Express controllers: in the case of MT8195,
> one of them is using a dedicated PHY, but the other uses a combo PHY
> that is shared with USB and in that case the PHY cannot be reset
> from the PCIe driver, or USB functionality will be unable to resume.
> 
> Resetting the PCIe MAC without also resetting the PHY will result in
> a full system lockup at PCIe resume time and the only option to
> resume operation is to hard reboot the system (with a PMIC cut-off).
> 
> To resolve this issue, check if we've got both a PHY and a MAC reset
> and, if not, never assert resets at PM suspend time: in that case,
> the link is still getting powered down as both the clocks and the
> power domains will go down anyway.
> 
> Fixes: d537dc125f07 ("PCI: mediatek-gen3: Add system PM support")
> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>

With this applied resume finally works on MT8195-Tomato! And I no longer see the
errors I mentioned in [1]. So,

Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>

Thanks,
Nícolas

[1] https://lore.kernel.org/all/d8cfb804-e47a-471c-8bc0-e974ee045655@notapiano/
  
Jianjun Wang (王建军) March 1, 2024, 2:48 a.m. UTC | #2
Hi Angelo,

Thanks for your patch.

On Thu, 2024-02-29 at 10:24 +0100, AngeloGioacchino Del Regno wrote:
> Some SoCs have two PCI-Express controllers: in the case of MT8195,
> one of them is using a dedicated PHY, but the other uses a combo PHY
> that is shared with USB and in that case the PHY cannot be reset
> from the PCIe driver, or USB functionality will be unable to resume.
> 
> Resetting the PCIe MAC without also resetting the PHY will result in
> a full system lockup at PCIe resume time and the only option to
> resume operation is to hard reboot the system (with a PMIC cut-off).
> 
> To resolve this issue, check if we've got both a PHY and a MAC reset
> and, if not, never assert resets at PM suspend time: in that case,
> the link is still getting powered down as both the clocks and the
> power domains will go down anyway.
> 
> Fixes: d537dc125f07 ("PCI: mediatek-gen3: Add system PM support")
> Signed-off-by: AngeloGioacchino Del Regno <
> angelogioacchino.delregno@collabora.com>
> ---
> 
> Changes in v2:
>  - Rebased over next-20240229
> 
>  drivers/pci/controller/pcie-mediatek-gen3.c | 25 ++++++++++++++-----
> --
>  1 file changed, 17 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c
> b/drivers/pci/controller/pcie-mediatek-gen3.c
> index 975b3024fb08..99b5d7a49be1 100644
> --- a/drivers/pci/controller/pcie-mediatek-gen3.c
> +++ b/drivers/pci/controller/pcie-mediatek-gen3.c
> @@ -874,17 +874,26 @@ static int mtk_pcie_power_up(struct
> mtk_gen3_pcie *pcie)
>  	return err;
>  }
>  
> -static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie)
> +static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie, bool
> is_suspend)
>  {
> +	bool suspend_reset_supported = pcie->mac_reset && pcie-
> >phy_reset;
> +
>  	clk_bulk_disable_unprepare(pcie->num_clks, pcie->clks);
>  
>  	pm_runtime_put_sync(pcie->dev);
>  	pm_runtime_disable(pcie->dev);
> -	reset_control_assert(pcie->mac_reset);
> +
> +	/*
> +	 * Assert MAC reset only if we also got a PHY reset, otherwise
> +	 * the system will lockup at PM resume time.
> +	 */
> +	if (is_suspend && suspend_reset_supported)
> +		reset_control_assert(pcie->mac_reset);
>  
>  	phy_power_off(pcie->phy);
>  	phy_exit(pcie->phy);
> -	reset_control_assert(pcie->phy_reset);
> +	if (is_suspend && suspend_reset_supported)
> +		reset_control_assert(pcie->phy_reset);
>  }
>  
>  static int mtk_pcie_setup(struct mtk_gen3_pcie *pcie)
> @@ -920,7 +929,7 @@ static int mtk_pcie_setup(struct mtk_gen3_pcie
> *pcie)
>  	return 0;
>  
>  err_setup:
> -	mtk_pcie_power_down(pcie);
> +	mtk_pcie_power_down(pcie, false);
>  
>  	return err;
>  }
> @@ -951,7 +960,7 @@ static int mtk_pcie_probe(struct platform_device
> *pdev)
>  	err = pci_host_probe(host);
>  	if (err) {
>  		mtk_pcie_irq_teardown(pcie);
> -		mtk_pcie_power_down(pcie);
> +		mtk_pcie_power_down(pcie, false);
>  		return err;
>  	}
>  
> @@ -969,7 +978,7 @@ static void mtk_pcie_remove(struct
> platform_device *pdev)
>  	pci_unlock_rescan_remove();
>  
>  	mtk_pcie_irq_teardown(pcie);
> -	mtk_pcie_power_down(pcie);
> +	mtk_pcie_power_down(pcie, false);

Is there any reason not to reset the MAC and PHY when probe fails and
driver removing? Some SoCs may not have MTCMOS to cut off their power,
we need to assert the reset signal to save power in that case.

Thanks.

>  }
>  
>  static void mtk_pcie_irq_save(struct mtk_gen3_pcie *pcie)
> @@ -1044,7 +1053,7 @@ static int mtk_pcie_suspend_noirq(struct device
> *dev)
>  	dev_dbg(pcie->dev, "entered L2 states successfully");
>  
>  	mtk_pcie_irq_save(pcie);
> -	mtk_pcie_power_down(pcie);
> +	mtk_pcie_power_down(pcie, true);
>  
>  	return 0;
>  }
> @@ -1060,7 +1069,7 @@ static int mtk_pcie_resume_noirq(struct device
> *dev)
>  
>  	err = mtk_pcie_startup_port(pcie);
>  	if (err) {
> -		mtk_pcie_power_down(pcie);
> +		mtk_pcie_power_down(pcie, false);
>  		return err;
>  	}
>
  
Siddharth Vadapalli March 1, 2024, 9:55 a.m. UTC | #3
On Fri, Mar 01, 2024 at 10:06:33AM +0100, AngeloGioacchino Del Regno wrote:
> Il 01/03/24 07:42, Siddharth Vadapalli ha scritto:
> > On Thu, Feb 29, 2024 at 10:24:49AM +0100, AngeloGioacchino Del Regno wrote:
> > > Some SoCs have two PCI-Express controllers: in the case of MT8195,
> > > one of them is using a dedicated PHY, but the other uses a combo PHY
> > > that is shared with USB and in that case the PHY cannot be reset
> > > from the PCIe driver, or USB functionality will be unable to resume.
> > > 
> > > Resetting the PCIe MAC without also resetting the PHY will result in
> > > a full system lockup at PCIe resume time and the only option to
> > > resume operation is to hard reboot the system (with a PMIC cut-off).
> > > 
> > > To resolve this issue, check if we've got both a PHY and a MAC reset
> > > and, if not, never assert resets at PM suspend time: in that case,
> > > the link is still getting powered down as both the clocks and the
> > > power domains will go down anyway.
> > > 
> > > Fixes: d537dc125f07 ("PCI: mediatek-gen3: Add system PM support")
> > > Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> > > ---
> > > 
> > > Changes in v2:
> > >   - Rebased over next-20240229
> > > 
> > >   drivers/pci/controller/pcie-mediatek-gen3.c | 25 ++++++++++++++-------
> > >   1 file changed, 17 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c
> > > index 975b3024fb08..99b5d7a49be1 100644
> > > --- a/drivers/pci/controller/pcie-mediatek-gen3.c
> > > +++ b/drivers/pci/controller/pcie-mediatek-gen3.c
> > > @@ -874,17 +874,26 @@ static int mtk_pcie_power_up(struct mtk_gen3_pcie *pcie)
> > >   	return err;
> > >   }
> > > -static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie)
> > > +static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie, bool is_suspend)
> > >   {
> > > +	bool suspend_reset_supported = pcie->mac_reset && pcie->phy_reset;
> > > +
> > >   	clk_bulk_disable_unprepare(pcie->num_clks, pcie->clks);
> > >   	pm_runtime_put_sync(pcie->dev);
> > >   	pm_runtime_disable(pcie->dev);
> > > -	reset_control_assert(pcie->mac_reset);
> > > +
> > > +	/*
> > > +	 * Assert MAC reset only if we also got a PHY reset, otherwise
> > > +	 * the system will lockup at PM resume time.
> > > +	 */
> > > +	if (is_suspend && suspend_reset_supported)
> > > +		reset_control_assert(pcie->mac_reset);
> > >   	phy_power_off(pcie->phy);
> > >   	phy_exit(pcie->phy);
> > 
> > Wouldn't this power off the shared PHY? Or will the PHY driver make this
> > NO-OP if the PHY is shared, in which case the above two statements could
> > be combined with the other statements in the:
> > if (is_suspend && suspend_reset_supported)
> > condition to get a single block of code that also combines the
> > reset_control_assert(pcie->phy_reset)
> > present below.
> > 
> 
> No, that'd be fine:
> 
> static int mtk_phy_power_off(struct phy *phy)
> {
> 	struct mtk_phy_instance *instance = phy_get_drvdata(phy);
> 	struct mtk_tphy *tphy = dev_get_drvdata(phy->dev.parent);
> 
> 	if (instance->type == PHY_TYPE_USB2)
> 		u2_phy_instance_power_off(tphy, instance);
> 	else if (instance->type == PHY_TYPE_PCIE)
> 		pcie_phy_instance_power_off(tphy, instance);
> 
> 	return 0;
> }
> 
> ...it's two different PHY instances that we're dealing with, here :-)

I didn't realize that it is handled separately. Thank you for clarifying!

Regards,
Siddharth.
  

Patch

diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c
index 975b3024fb08..99b5d7a49be1 100644
--- a/drivers/pci/controller/pcie-mediatek-gen3.c
+++ b/drivers/pci/controller/pcie-mediatek-gen3.c
@@ -874,17 +874,26 @@  static int mtk_pcie_power_up(struct mtk_gen3_pcie *pcie)
 	return err;
 }
 
-static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie)
+static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie, bool is_suspend)
 {
+	bool suspend_reset_supported = pcie->mac_reset && pcie->phy_reset;
+
 	clk_bulk_disable_unprepare(pcie->num_clks, pcie->clks);
 
 	pm_runtime_put_sync(pcie->dev);
 	pm_runtime_disable(pcie->dev);
-	reset_control_assert(pcie->mac_reset);
+
+	/*
+	 * Assert MAC reset only if we also got a PHY reset, otherwise
+	 * the system will lockup at PM resume time.
+	 */
+	if (is_suspend && suspend_reset_supported)
+		reset_control_assert(pcie->mac_reset);
 
 	phy_power_off(pcie->phy);
 	phy_exit(pcie->phy);
-	reset_control_assert(pcie->phy_reset);
+	if (is_suspend && suspend_reset_supported)
+		reset_control_assert(pcie->phy_reset);
 }
 
 static int mtk_pcie_setup(struct mtk_gen3_pcie *pcie)
@@ -920,7 +929,7 @@  static int mtk_pcie_setup(struct mtk_gen3_pcie *pcie)
 	return 0;
 
 err_setup:
-	mtk_pcie_power_down(pcie);
+	mtk_pcie_power_down(pcie, false);
 
 	return err;
 }
@@ -951,7 +960,7 @@  static int mtk_pcie_probe(struct platform_device *pdev)
 	err = pci_host_probe(host);
 	if (err) {
 		mtk_pcie_irq_teardown(pcie);
-		mtk_pcie_power_down(pcie);
+		mtk_pcie_power_down(pcie, false);
 		return err;
 	}
 
@@ -969,7 +978,7 @@  static void mtk_pcie_remove(struct platform_device *pdev)
 	pci_unlock_rescan_remove();
 
 	mtk_pcie_irq_teardown(pcie);
-	mtk_pcie_power_down(pcie);
+	mtk_pcie_power_down(pcie, false);
 }
 
 static void mtk_pcie_irq_save(struct mtk_gen3_pcie *pcie)
@@ -1044,7 +1053,7 @@  static int mtk_pcie_suspend_noirq(struct device *dev)
 	dev_dbg(pcie->dev, "entered L2 states successfully");
 
 	mtk_pcie_irq_save(pcie);
-	mtk_pcie_power_down(pcie);
+	mtk_pcie_power_down(pcie, true);
 
 	return 0;
 }
@@ -1060,7 +1069,7 @@  static int mtk_pcie_resume_noirq(struct device *dev)
 
 	err = mtk_pcie_startup_port(pcie);
 	if (err) {
-		mtk_pcie_power_down(pcie);
+		mtk_pcie_power_down(pcie, false);
 		return err;
 	}