[v15,22/23] PCI: starfive: Offload the NVMe timeout workaround to host drivers.

Message ID 20240218101921.113528-7-minda.chen@starfivetech.com
State New
Headers
Series [v15,01/23] dt-bindings: PCI: Add PLDA XpressRICH PCIe host common properties |

Commit Message

Minda Chen Feb. 18, 2024, 10:19 a.m. UTC
  From: Kevin Xie <kevin.xie@starfivetech.com>

As the Starfive JH7110 hardware can't keep two inbound post write in
order all the time, such as MSI messages and NVMe completions. If the
NVMe completion update later than the MSI, an NVMe IRQ handle will miss.

As a workaround, we will wait a while before going to the generic handle
here.

Verified with NVMe SSD, USB SSD, R8169 NIC.
The performance are stable and even higher after this patch.

Signed-off-by: Kevin Xie <kevin.xie@starfivetech.com>
Signed-off-by: Minda Chen <minda.chen@starfivetech.com>
---
 drivers/pci/controller/plda/pcie-plda-host.c | 12 ++++++++++++
 drivers/pci/controller/plda/pcie-plda.h      |  1 +
 drivers/pci/controller/plda/pcie-starfive.c  |  1 +
 3 files changed, 14 insertions(+)
  

Comments

Minda Chen Feb. 18, 2024, 10:23 a.m. UTC | #1
> 
> From: Kevin Xie <kevin.xie@starfivetech.com>
> 
> As the Starfive JH7110 hardware can't keep two inbound post write in order all
> the time, such as MSI messages and NVMe completions. If the NVMe
> completion update later than the MSI, an NVMe IRQ handle will miss.
> 
> As a workaround, we will wait a while before going to the generic handle here.
> 
> Verified with NVMe SSD, USB SSD, R8169 NIC.
> The performance are stable and even higher after this patch.
> 
> Signed-off-by: Kevin Xie <kevin.xie@starfivetech.com>
> Signed-off-by: Minda Chen <minda.chen@starfivetech.com>
> ---
>  drivers/pci/controller/plda/pcie-plda-host.c | 12 ++++++++++++
>  drivers/pci/controller/plda/pcie-plda.h      |  1 +
>  drivers/pci/controller/plda/pcie-starfive.c  |  1 +
>  3 files changed, 14 insertions(+)
> 
> diff --git a/drivers/pci/controller/plda/pcie-plda-host.c
> b/drivers/pci/controller/plda/pcie-plda-host.c
> index a18923d7cea6..9e077ddf45c0 100644
> --- a/drivers/pci/controller/plda/pcie-plda-host.c
> +++ b/drivers/pci/controller/plda/pcie-plda-host.c
> @@ -13,6 +13,7 @@
>  #include <linux/msi.h>
>  #include <linux/pci_regs.h>
>  #include <linux/pci-ecam.h>
> +#include <linux/delay.h>
> 
>  #include "pcie-plda.h"
> 
> @@ -44,6 +45,17 @@ static void plda_handle_msi(struct irq_desc *desc)
>  			       bridge_base_addr + ISTATUS_LOCAL);
>  		status = readl_relaxed(bridge_base_addr + ISTATUS_MSI);
>  		for_each_set_bit(bit, &status, msi->num_vectors) {
> +			/*
> +			 * As the Starfive JH7110 hardware can't keep two
> +			 * inbound post write in order all the time, such as
> +			 * MSI messages and NVMe completions.
> +			 * If the NVMe completion update later than the MSI,
> +			 * an NVMe IRQ handle will miss.
> +			 * As a workaround, we will wait a while before
> +			 * going to the generic handle here.
> +			 */
> +			if (port->msi_quirk_delay_us)
> +				udelay(port->msi_quirk_delay_us);
>  			ret = generic_handle_domain_irq(msi->dev_domain, bit);
>  			if (ret)
>  				dev_err_ratelimited(dev, "bad MSI IRQ %d\n", diff --git
> a/drivers/pci/controller/plda/pcie-plda.h
> b/drivers/pci/controller/plda/pcie-plda.h
> index 04e385758a2f..feccf285dfe8 100644
> --- a/drivers/pci/controller/plda/pcie-plda.h
> +++ b/drivers/pci/controller/plda/pcie-plda.h
> @@ -186,6 +186,7 @@ struct plda_pcie_rp {
>  	int msi_irq;
>  	int intx_irq;
>  	int num_events;
> +	u16 msi_quirk_delay_us;
>  };
> 
>  struct plda_event {
> diff --git a/drivers/pci/controller/plda/pcie-starfive.c
> b/drivers/pci/controller/plda/pcie-starfive.c
> index 9bb9f0e29565..5cfc30572b7f 100644
> --- a/drivers/pci/controller/plda/pcie-starfive.c
> +++ b/drivers/pci/controller/plda/pcie-starfive.c
> @@ -391,6 +391,7 @@ static int starfive_pcie_probe(struct platform_device
> *pdev)
> 
>  	plda->host_ops = &sf_host_ops;
>  	plda->num_events = PLDA_MAX_EVENT_NUM;
> +	plda->msi_quirk_delay_us = 1;
>  	/* mask doorbell event */
>  	plda->events_bitmap = GENMASK(PLDA_INT_EVENT_NUM - 1, 0)
>  			     & ~BIT(PLDA_AXI_DOORBELL)
> --
> 2.17.1

Hi Kevin Hilman
   Will you try this series patch? Maybe this patch can fix the NVMe timeout print issue.
If this patch can fix it, please notify us. Thanks
  

Patch

diff --git a/drivers/pci/controller/plda/pcie-plda-host.c b/drivers/pci/controller/plda/pcie-plda-host.c
index a18923d7cea6..9e077ddf45c0 100644
--- a/drivers/pci/controller/plda/pcie-plda-host.c
+++ b/drivers/pci/controller/plda/pcie-plda-host.c
@@ -13,6 +13,7 @@ 
 #include <linux/msi.h>
 #include <linux/pci_regs.h>
 #include <linux/pci-ecam.h>
+#include <linux/delay.h>
 
 #include "pcie-plda.h"
 
@@ -44,6 +45,17 @@  static void plda_handle_msi(struct irq_desc *desc)
 			       bridge_base_addr + ISTATUS_LOCAL);
 		status = readl_relaxed(bridge_base_addr + ISTATUS_MSI);
 		for_each_set_bit(bit, &status, msi->num_vectors) {
+			/*
+			 * As the Starfive JH7110 hardware can't keep two
+			 * inbound post write in order all the time, such as
+			 * MSI messages and NVMe completions.
+			 * If the NVMe completion update later than the MSI,
+			 * an NVMe IRQ handle will miss.
+			 * As a workaround, we will wait a while before
+			 * going to the generic handle here.
+			 */
+			if (port->msi_quirk_delay_us)
+				udelay(port->msi_quirk_delay_us);
 			ret = generic_handle_domain_irq(msi->dev_domain, bit);
 			if (ret)
 				dev_err_ratelimited(dev, "bad MSI IRQ %d\n",
diff --git a/drivers/pci/controller/plda/pcie-plda.h b/drivers/pci/controller/plda/pcie-plda.h
index 04e385758a2f..feccf285dfe8 100644
--- a/drivers/pci/controller/plda/pcie-plda.h
+++ b/drivers/pci/controller/plda/pcie-plda.h
@@ -186,6 +186,7 @@  struct plda_pcie_rp {
 	int msi_irq;
 	int intx_irq;
 	int num_events;
+	u16 msi_quirk_delay_us;
 };
 
 struct plda_event {
diff --git a/drivers/pci/controller/plda/pcie-starfive.c b/drivers/pci/controller/plda/pcie-starfive.c
index 9bb9f0e29565..5cfc30572b7f 100644
--- a/drivers/pci/controller/plda/pcie-starfive.c
+++ b/drivers/pci/controller/plda/pcie-starfive.c
@@ -391,6 +391,7 @@  static int starfive_pcie_probe(struct platform_device *pdev)
 
 	plda->host_ops = &sf_host_ops;
 	plda->num_events = PLDA_MAX_EVENT_NUM;
+	plda->msi_quirk_delay_us = 1;
 	/* mask doorbell event */
 	plda->events_bitmap = GENMASK(PLDA_INT_EVENT_NUM - 1, 0)
 			     & ~BIT(PLDA_AXI_DOORBELL)