[v5,16/17] qcom: llcc/edac: Support polling mode for ECC handling

Message ID 20221228084028.46528-17-manivannan.sadhasivam@linaro.org
State New
Headers
Series Qcom: LLCC/EDAC: Fix base address used for LLCC banks |

Commit Message

Manivannan Sadhasivam Dec. 28, 2022, 8:40 a.m. UTC
  Not all Qcom platforms support IRQ mode for ECC handling. For those
platforms, the current EDAC driver will not be probed due to missing ECC
IRQ in devicetree.

So add support for polling mode so that the EDAC driver can be used on all
Qcom platforms supporting LLCC.

The polling delay of 5000ms is chosen based on Qcom downstream/vendor
driver.

Reported-by: Luca Weiss <luca.weiss@fairphone.com>
Tested-by: Luca Weiss <luca.weiss@fairphone.com>
Tested-by: Steev Klimaszewski <steev@kali.org> # Thinkpad X13s
Tested-by: Andrew Halaney <ahalaney@redhat.com> # sa8540p-ride
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 drivers/edac/qcom_edac.c     | 37 +++++++++++++++++++++++++-----------
 drivers/soc/qcom/llcc-qcom.c | 13 ++++++-------
 2 files changed, 32 insertions(+), 18 deletions(-)
  

Comments

Borislav Petkov Jan. 14, 2023, 1:36 p.m. UTC | #1
On Wed, Dec 28, 2022 at 02:10:27PM +0530, Manivannan Sadhasivam wrote:
>  static int qcom_llcc_edac_probe(struct platform_device *pdev)
>  {
>  	struct llcc_drv_data *llcc_driv_data = pdev->dev.platform_data;
> @@ -355,22 +361,31 @@ static int qcom_llcc_edac_probe(struct platform_device *pdev)
>  	edev_ctl->ctl_name = "llcc";
>  	edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE;
>  
> +	/* Check if LLCC driver has passed ECC IRQ */
> +	ecc_irq = llcc_driv_data->ecc_irq;
> +	if (ecc_irq > 0) {
> +		/* Use interrupt mode if IRQ is available */
> +		edac_op_state = EDAC_OPSTATE_INT;
> +	} else {
> +		/* Fall back to polling mode otherwise */
> +		edac_op_state = EDAC_OPSTATE_POLL;
> +		edev_ctl->poll_msec = ECC_POLL_MSEC;
> +		edev_ctl->edac_check = llcc_ecc_check;
> +	}
> +
>  	rc = edac_device_add_device(edev_ctl);
>  	if (rc)
>  		goto out_mem;
>  
>  	platform_set_drvdata(pdev, edev_ctl);
>  
> -	/* Request for ecc irq */
> -	ecc_irq = llcc_driv_data->ecc_irq;
> -	if (ecc_irq < 0) {
> -		rc = -ENODEV;
> -		goto out_dev;
> -	}
> -	rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler,
> +	/* Request ECC IRQ if available */
> +	if (ecc_irq > 0) {
> +		rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler,
>  			      IRQF_TRIGGER_HIGH, "llcc_ecc", edev_ctl);

You need to request the IRQ first and then set edac_op_state above. I.e., this
devm_request_irq() needs to move in the if (ecc_irq > 0) branch above.
  
Manivannan Sadhasivam Jan. 15, 2023, 4:08 a.m. UTC | #2
On Sat, Jan 14, 2023 at 02:36:16PM +0100, Borislav Petkov wrote:
> On Wed, Dec 28, 2022 at 02:10:27PM +0530, Manivannan Sadhasivam wrote:
> >  static int qcom_llcc_edac_probe(struct platform_device *pdev)
> >  {
> >  	struct llcc_drv_data *llcc_driv_data = pdev->dev.platform_data;
> > @@ -355,22 +361,31 @@ static int qcom_llcc_edac_probe(struct platform_device *pdev)
> >  	edev_ctl->ctl_name = "llcc";
> >  	edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE;
> >  
> > +	/* Check if LLCC driver has passed ECC IRQ */
> > +	ecc_irq = llcc_driv_data->ecc_irq;
> > +	if (ecc_irq > 0) {
> > +		/* Use interrupt mode if IRQ is available */
> > +		edac_op_state = EDAC_OPSTATE_INT;
> > +	} else {
> > +		/* Fall back to polling mode otherwise */
> > +		edac_op_state = EDAC_OPSTATE_POLL;
> > +		edev_ctl->poll_msec = ECC_POLL_MSEC;
> > +		edev_ctl->edac_check = llcc_ecc_check;
> > +	}
> > +
> >  	rc = edac_device_add_device(edev_ctl);
> >  	if (rc)
> >  		goto out_mem;
> >  
> >  	platform_set_drvdata(pdev, edev_ctl);
> >  
> > -	/* Request for ecc irq */
> > -	ecc_irq = llcc_driv_data->ecc_irq;
> > -	if (ecc_irq < 0) {
> > -		rc = -ENODEV;
> > -		goto out_dev;
> > -	}
> > -	rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler,
> > +	/* Request ECC IRQ if available */
> > +	if (ecc_irq > 0) {
> > +		rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler,
> >  			      IRQF_TRIGGER_HIGH, "llcc_ecc", edev_ctl);
> 
> You need to request the IRQ first and then set edac_op_state above. I.e., this
> devm_request_irq() needs to move in the if (ecc_irq > 0) branch above.

May I know why? I also checked other drivers, most of them are doing the same.

Thanks,
Mani

> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette
  
Borislav Petkov Jan. 16, 2023, 10:41 a.m. UTC | #3
On Sun, Jan 15, 2023 at 09:38:25AM +0530, Manivannan Sadhasivam wrote:
> > You need to request the IRQ first and then set edac_op_state above. I.e., this
> > devm_request_irq() needs to move in the if (ecc_irq > 0) branch above.
> 
> May I know why? I also checked other drivers, most of them are doing the same.

If the others do it, that doesn't mean it is clean.

What happens to edac_op_state if devm_request_irq() fails?

I know I know, the probe function will fail and the driver won't load but still,
this is sloppy. And it could come down to bite us later, when someone
reorganizes that function.

So, do all the error checking method determination - polling or interrupt - in
one place.  Something like this (totally untested ofc, pasting here the whole
thing to show what I mean):

static int qcom_llcc_edac_probe(struct platform_device *pdev)
{
        struct llcc_drv_data *llcc_driv_data = pdev->dev.platform_data;
        struct edac_device_ctl_info *edev_ctl;
        struct device *dev = &pdev->dev;
        int ecc_irq;
        int rc;

        rc = qcom_llcc_core_setup(llcc_driv_data->bcast_regmap);
        if (rc)
                return rc;

        /* Allocate edac control info */
        edev_ctl = edac_device_alloc_ctl_info(0, "qcom-llcc", 1, "bank",
                                              llcc_driv_data->num_banks, 1,
                                              NULL, 0,
                                              edac_device_alloc_index());

        if (!edev_ctl)
                return -ENOMEM;

        edev_ctl->dev = dev;
        edev_ctl->mod_name = dev_name(dev);
        edev_ctl->dev_name = dev_name(dev);
        edev_ctl->ctl_name = "llcc";
        edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE;

        /* Check if LLCC driver has passed ECC IRQ */
        ecc_irq = llcc_driv_data->ecc_irq;
        if (ecc_irq > 0) {
                rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler,
                                      IRQF_TRIGGER_HIGH, "llcc_ecc", edev_ctl);
                if (!rc) {
                        edac_op_state = EDAC_OPSTATE_INT;
                        goto irq_done;
                }
        }

        /* Fall back to polling mode otherwise */
        edev_ctl->poll_msec = ECC_POLL_MSEC;
        edev_ctl->edac_check = llcc_ecc_check;
        edac_op_state = EDAC_OPSTATE_POLL;

irq_done:
        rc = edac_device_add_device(edev_ctl);
        if (rc) {
                edac_device_free_ctl_info(edev_ctl);
                return rc;
        }

        platform_set_drvdata(pdev, edev_ctl);

        return rc;
}
  
Manivannan Sadhasivam Jan. 18, 2023, 3:08 p.m. UTC | #4
On Mon, Jan 16, 2023 at 11:41:26AM +0100, Borislav Petkov wrote:
> On Sun, Jan 15, 2023 at 09:38:25AM +0530, Manivannan Sadhasivam wrote:
> > > You need to request the IRQ first and then set edac_op_state above. I.e., this
> > > devm_request_irq() needs to move in the if (ecc_irq > 0) branch above.
> > 
> > May I know why? I also checked other drivers, most of them are doing the same.
> 
> If the others do it, that doesn't mean it is clean.
> 
> What happens to edac_op_state if devm_request_irq() fails?
> 
> I know I know, the probe function will fail and the driver won't load but still,
> this is sloppy. And it could come down to bite us later, when someone
> reorganizes that function.
> 

OK. I just wanted to know the reasoning behind it.

Thanks,
Mani

> So, do all the error checking method determination - polling or interrupt - in
> one place.  Something like this (totally untested ofc, pasting here the whole
> thing to show what I mean):
> 
> static int qcom_llcc_edac_probe(struct platform_device *pdev)
> {
>         struct llcc_drv_data *llcc_driv_data = pdev->dev.platform_data;
>         struct edac_device_ctl_info *edev_ctl;
>         struct device *dev = &pdev->dev;
>         int ecc_irq;
>         int rc;
> 
>         rc = qcom_llcc_core_setup(llcc_driv_data->bcast_regmap);
>         if (rc)
>                 return rc;
> 
>         /* Allocate edac control info */
>         edev_ctl = edac_device_alloc_ctl_info(0, "qcom-llcc", 1, "bank",
>                                               llcc_driv_data->num_banks, 1,
>                                               NULL, 0,
>                                               edac_device_alloc_index());
> 
>         if (!edev_ctl)
>                 return -ENOMEM;
> 
>         edev_ctl->dev = dev;
>         edev_ctl->mod_name = dev_name(dev);
>         edev_ctl->dev_name = dev_name(dev);
>         edev_ctl->ctl_name = "llcc";
>         edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE;
> 
>         /* Check if LLCC driver has passed ECC IRQ */
>         ecc_irq = llcc_driv_data->ecc_irq;
>         if (ecc_irq > 0) {
>                 rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler,
>                                       IRQF_TRIGGER_HIGH, "llcc_ecc", edev_ctl);
>                 if (!rc) {
>                         edac_op_state = EDAC_OPSTATE_INT;
>                         goto irq_done;
>                 }
>         }
> 
>         /* Fall back to polling mode otherwise */
>         edev_ctl->poll_msec = ECC_POLL_MSEC;
>         edev_ctl->edac_check = llcc_ecc_check;
>         edac_op_state = EDAC_OPSTATE_POLL;
> 
> irq_done:
>         rc = edac_device_add_device(edev_ctl);
>         if (rc) {
>                 edac_device_free_ctl_info(edev_ctl);
>                 return rc;
>         }
> 
>         platform_set_drvdata(pdev, edev_ctl);
> 
>         return rc;
> }
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette
  

Patch

diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c
index 1d3cc1930a74..cfcdc35b0373 100644
--- a/drivers/edac/qcom_edac.c
+++ b/drivers/edac/qcom_edac.c
@@ -76,6 +76,8 @@ 
 #define DRP0_INTERRUPT_ENABLE           BIT(6)
 #define SB_DB_DRP_INTERRUPT_ENABLE      0x3
 
+#define ECC_POLL_MSEC			5000
+
 enum {
 	LLCC_DRAM_CE = 0,
 	LLCC_DRAM_UE,
@@ -283,8 +285,7 @@  dump_syn_reg(struct edac_device_ctl_info *edev_ctl, int err_type, u32 bank)
 	return ret;
 }
 
-static irqreturn_t
-llcc_ecc_irq_handler(int irq, void *edev_ctl)
+static irqreturn_t llcc_ecc_irq_handler(int irq, void *edev_ctl)
 {
 	struct edac_device_ctl_info *edac_dev_ctl = edev_ctl;
 	struct llcc_drv_data *drv = edac_dev_ctl->dev->platform_data;
@@ -328,6 +329,11 @@  llcc_ecc_irq_handler(int irq, void *edev_ctl)
 	return irq_rc;
 }
 
+static void llcc_ecc_check(struct edac_device_ctl_info *edev_ctl)
+{
+	llcc_ecc_irq_handler(0, edev_ctl);
+}
+
 static int qcom_llcc_edac_probe(struct platform_device *pdev)
 {
 	struct llcc_drv_data *llcc_driv_data = pdev->dev.platform_data;
@@ -355,22 +361,31 @@  static int qcom_llcc_edac_probe(struct platform_device *pdev)
 	edev_ctl->ctl_name = "llcc";
 	edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE;
 
+	/* Check if LLCC driver has passed ECC IRQ */
+	ecc_irq = llcc_driv_data->ecc_irq;
+	if (ecc_irq > 0) {
+		/* Use interrupt mode if IRQ is available */
+		edac_op_state = EDAC_OPSTATE_INT;
+	} else {
+		/* Fall back to polling mode otherwise */
+		edac_op_state = EDAC_OPSTATE_POLL;
+		edev_ctl->poll_msec = ECC_POLL_MSEC;
+		edev_ctl->edac_check = llcc_ecc_check;
+	}
+
 	rc = edac_device_add_device(edev_ctl);
 	if (rc)
 		goto out_mem;
 
 	platform_set_drvdata(pdev, edev_ctl);
 
-	/* Request for ecc irq */
-	ecc_irq = llcc_driv_data->ecc_irq;
-	if (ecc_irq < 0) {
-		rc = -ENODEV;
-		goto out_dev;
-	}
-	rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler,
+	/* Request ECC IRQ if available */
+	if (ecc_irq > 0) {
+		rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler,
 			      IRQF_TRIGGER_HIGH, "llcc_ecc", edev_ctl);
-	if (rc)
-		goto out_dev;
+		if (rc)
+			goto out_dev;
+	}
 
 	return rc;
 
diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c
index 72f3f2a9aaa0..7b7c5a38bac6 100644
--- a/drivers/soc/qcom/llcc-qcom.c
+++ b/drivers/soc/qcom/llcc-qcom.c
@@ -1011,13 +1011,12 @@  static int qcom_llcc_probe(struct platform_device *pdev)
 		goto err;
 
 	drv_data->ecc_irq = platform_get_irq_optional(pdev, 0);
-	if (drv_data->ecc_irq >= 0) {
-		llcc_edac = platform_device_register_data(&pdev->dev,
-						"qcom_llcc_edac", -1, drv_data,
-						sizeof(*drv_data));
-		if (IS_ERR(llcc_edac))
-			dev_err(dev, "Failed to register llcc edac driver\n");
-	}
+
+	llcc_edac = platform_device_register_data(&pdev->dev,
+					"qcom_llcc_edac", -1, drv_data,
+					sizeof(*drv_data));
+	if (IS_ERR(llcc_edac))
+		dev_err(dev, "Failed to register llcc edac driver\n");
 
 	return 0;
 err: