[net,v4] net: stmmac: xgmac: fix handling of DPP safety error for DMA channels

Message ID 20240203051439.1127090-1-0x1207@gmail.com
State New
Headers
Series [net,v4] net: stmmac: xgmac: fix handling of DPP safety error for DMA channels |

Commit Message

Furong Xu Feb. 3, 2024, 5:14 a.m. UTC
  Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in
XGMAC core") checks and reports safety errors, but leaves the
Data Path Parity Errors for each channel in DMA unhandled at all, lead to
a storm of interrupt.
Fix it by checking and clearing the DMA_DPP_Interrupt_Status register.

Fixes: 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core")
Signed-off-by: Furong Xu <0x1207@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
---
Changes in v4:
 - fix a typo name of DDPP bit, thanks Serge Semin

Changes in v3:
 - code style fix, thanks Paolo Abeni

Changes in v2:
  - explicit enable Data Path Parity Protection
  - add new counters to stmmac_safety_stats
  - add detailed log
---
 drivers/net/ethernet/stmicro/stmmac/common.h  |  1 +
 .../net/ethernet/stmicro/stmmac/dwxgmac2.h    |  3 +
 .../ethernet/stmicro/stmmac/dwxgmac2_core.c   | 57 ++++++++++++++++++-
 3 files changed, 60 insertions(+), 1 deletion(-)
  

Comments

Simon Horman Feb. 8, 2024, 9:26 a.m. UTC | #1
On Wed, Feb 07, 2024 at 11:56:26AM +0000, Jon Hunter wrote:
> 
> On 03/02/2024 05:14, Furong Xu wrote:
> > Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in
> > XGMAC core") checks and reports safety errors, but leaves the
> > Data Path Parity Errors for each channel in DMA unhandled at all, lead to
> > a storm of interrupt.
> > Fix it by checking and clearing the DMA_DPP_Interrupt_Status register.
> > 
> > Fixes: 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core")
> > Signed-off-by: Furong Xu <0x1207@gmail.com>
> > Reviewed-by: Simon Horman <horms@kernel.org>
> > Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
> > ---
> > Changes in v4:
> >   - fix a typo name of DDPP bit, thanks Serge Semin
> > 
> > Changes in v3:
> >   - code style fix, thanks Paolo Abeni
> > 
> > Changes in v2:
> >    - explicit enable Data Path Parity Protection
> >    - add new counters to stmmac_safety_stats
> >    - add detailed log
> > ---
> >   drivers/net/ethernet/stmicro/stmmac/common.h  |  1 +
> >   .../net/ethernet/stmicro/stmmac/dwxgmac2.h    |  3 +
> >   .../ethernet/stmicro/stmmac/dwxgmac2_core.c   | 57 ++++++++++++++++++-
> >   3 files changed, 60 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
> > index 721c1f8e892f..b4f60ab078d6 100644
> > --- a/drivers/net/ethernet/stmicro/stmmac/common.h
> > +++ b/drivers/net/ethernet/stmicro/stmmac/common.h
> > @@ -216,6 +216,7 @@ struct stmmac_safety_stats {
> >   	unsigned long mac_errors[32];
> >   	unsigned long mtl_errors[32];
> >   	unsigned long dma_errors[32];
> > +	unsigned long dma_dpp_errors[32];
> >   };
> >   /* Number of fields in Safety Stats */
> > diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
> > index 207ff1799f2c..5c67a3f89f08 100644
> > --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
> > +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
> > @@ -303,6 +303,8 @@
> >   #define XGMAC_RXCEIE			BIT(4)
> >   #define XGMAC_TXCEIE			BIT(0)
> >   #define XGMAC_MTL_ECC_INT_STATUS	0x000010cc
> > +#define XGMAC_MTL_DPP_CONTROL		0x000010e0
> > +#define XGMAC_DPP_DISABLE		BIT(0)
> >   #define XGMAC_MTL_TXQ_OPMODE(x)		(0x00001100 + (0x80 * (x)))
> >   #define XGMAC_TQS			GENMASK(25, 16)
> >   #define XGMAC_TQS_SHIFT			16
> > @@ -385,6 +387,7 @@
> >   #define XGMAC_DCEIE			BIT(1)
> >   #define XGMAC_TCEIE			BIT(0)
> >   #define XGMAC_DMA_ECC_INT_STATUS	0x0000306c
> > +#define XGMAC_DMA_DPP_INT_STATUS	0x00003074
> >   #define XGMAC_DMA_CH_CONTROL(x)		(0x00003100 + (0x80 * (x)))
> >   #define XGMAC_SPH			BIT(24)
> >   #define XGMAC_PBLx8			BIT(16)
> > diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> > index eb48211d9b0e..04d7c4dc2e35 100644
> > --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> > +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> > @@ -830,6 +830,43 @@ static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= {
> >   	{ false, "UNKNOWN", "Unknown Error" }, /* 31 */
> >   };
> > +static const char * const dpp_rx_err = "Read Rx Descriptor Parity checker Error";
> > +static const char * const dpp_tx_err = "Read Tx Descriptor Parity checker Error";
> > +static const struct dwxgmac3_error_desc dwxgmac3_dma_dpp_errors[32] = {
> > +	{ true, "TDPES0", dpp_tx_err },
> > +	{ true, "TDPES1", dpp_tx_err },
> > +	{ true, "TDPES2", dpp_tx_err },
> > +	{ true, "TDPES3", dpp_tx_err },
> > +	{ true, "TDPES4", dpp_tx_err },
> > +	{ true, "TDPES5", dpp_tx_err },
> > +	{ true, "TDPES6", dpp_tx_err },
> > +	{ true, "TDPES7", dpp_tx_err },
> > +	{ true, "TDPES8", dpp_tx_err },
> > +	{ true, "TDPES9", dpp_tx_err },
> > +	{ true, "TDPES10", dpp_tx_err },
> > +	{ true, "TDPES11", dpp_tx_err },
> > +	{ true, "TDPES12", dpp_tx_err },
> > +	{ true, "TDPES13", dpp_tx_err },
> > +	{ true, "TDPES14", dpp_tx_err },
> > +	{ true, "TDPES15", dpp_tx_err },
> > +	{ true, "RDPES0", dpp_rx_err },
> > +	{ true, "RDPES1", dpp_rx_err },
> > +	{ true, "RDPES2", dpp_rx_err },
> > +	{ true, "RDPES3", dpp_rx_err },
> > +	{ true, "RDPES4", dpp_rx_err },
> > +	{ true, "RDPES5", dpp_rx_err },
> > +	{ true, "RDPES6", dpp_rx_err },
> > +	{ true, "RDPES7", dpp_rx_err },
> > +	{ true, "RDPES8", dpp_rx_err },
> > +	{ true, "RDPES9", dpp_rx_err },
> > +	{ true, "RDPES10", dpp_rx_err },
> > +	{ true, "RDPES11", dpp_rx_err },
> > +	{ true, "RDPES12", dpp_rx_err },
> > +	{ true, "RDPES13", dpp_rx_err },
> > +	{ true, "RDPES14", dpp_rx_err },
> > +	{ true, "RDPES15", dpp_rx_err },
> > +};
> > +
> >   static void dwxgmac3_handle_dma_err(struct net_device *ndev,
> >   				    void __iomem *ioaddr, bool correctable,
> >   				    struct stmmac_safety_stats *stats)
> > @@ -841,6 +878,13 @@ static void dwxgmac3_handle_dma_err(struct net_device *ndev,
> >   	dwxgmac3_log_error(ndev, value, correctable, "DMA",
> >   			   dwxgmac3_dma_errors, STAT_OFF(dma_errors), stats);
> > +
> > +	value = readl(ioaddr + XGMAC_DMA_DPP_INT_STATUS);
> > +	writel(value, ioaddr + XGMAC_DMA_DPP_INT_STATUS);
> > +
> > +	dwxgmac3_log_error(ndev, value, false, "DMA_DPP",
> > +			   dwxgmac3_dma_dpp_errors,
> > +			   STAT_OFF(dma_dpp_errors), stats);
> >   }
> >   static int
> > @@ -881,6 +925,12 @@ dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp,
> >   	value |= XGMAC_TMOUTEN; /* FSM Timeout Feature */
> >   	writel(value, ioaddr + XGMAC_MAC_FSM_CONTROL);
> > +	/* 5. Enable Data Path Parity Protection */
> > +	value = readl(ioaddr + XGMAC_MTL_DPP_CONTROL);
> > +	/* already enabled by default, explicit enable it again */
> > +	value &= ~XGMAC_DPP_DISABLE;
> > +	writel(value, ioaddr + XGMAC_MTL_DPP_CONTROL);
> > +
> >   	return 0;
> >   }
> > @@ -914,7 +964,11 @@ static int dwxgmac3_safety_feat_irq_status(struct net_device *ndev,
> >   		ret |= !corr;
> >   	}
> > -	err = dma & (XGMAC_DEUIS | XGMAC_DECIS);
> > +	/* DMA_DPP_Interrupt_Status is indicated by MCSIS bit in
> > +	 * DMA_Safety_Interrupt_Status, so we handle DMA Data Path
> > +	 * Parity Errors here
> > +	 */
> > +	err = dma & (XGMAC_DEUIS | XGMAC_DECIS | XGMAC_MCSIS);
> >   	corr = dma & XGMAC_DECIS;
> >   	if (err) {
> >   		dwxgmac3_handle_dma_err(ndev, ioaddr, corr, stats);
> > @@ -930,6 +984,7 @@ static const struct dwxgmac3_error {
> >   	{ dwxgmac3_mac_errors },
> >   	{ dwxgmac3_mtl_errors },
> >   	{ dwxgmac3_dma_errors },
> > +	{ dwxgmac3_dma_dpp_errors },
> >   };
> >   static int dwxgmac3_safety_feat_dump(struct stmmac_safety_stats *stats,
> 
> 
> This change is breaking the build on some of our builders that are still using GCC 6.x ...
> 
> drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c:836:20: error: initialiser element is not constant
>   { true, "TDPES0", dpp_tx_err },
>                     ^~~~~~~~~~
> drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c:836:20: note: (near initialisation for ‘dwxgmac3_dma_dpp_errors[0].detailed_desc’)
> drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c:837:20: error: initialiser element is not constant
>   { true, "TDPES1", dpp_tx_err },
>                     ^~~~~~~~~~
> drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c:837:20: note: (near initialisation for ‘dwxgmac3_dma_dpp_errors[1].detailed_desc’)
> ...
> 
> I know that this is quite old but the minimum supported by the kernel is v5.1 ...
> 
> https://www.kernel.org/doc/html/next/process/changes.html

Thanks Jon,

I separately received a notification about this occurring with gcc 7.

https://lore.kernel.org/oe-kbuild-all/202402081135.lAxxBXHk-lkp@intel.com/

It is unclear to me why this occurs, as dpp_tx_err and dpp_tx_err are const.
But I do seem to be able to address this problem by using #defines for
these values instead.

I plan to post a patch shortly.
  
Simon Horman Feb. 8, 2024, 9:53 a.m. UTC | #2
On Thu, Feb 08, 2024 at 09:26:27AM +0000, Simon Horman wrote:
> On Wed, Feb 07, 2024 at 11:56:26AM +0000, Jon Hunter wrote:
> > 
> > On 03/02/2024 05:14, Furong Xu wrote:
> > > Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in
> > > XGMAC core") checks and reports safety errors, but leaves the
> > > Data Path Parity Errors for each channel in DMA unhandled at all, lead to
> > > a storm of interrupt.
> > > Fix it by checking and clearing the DMA_DPP_Interrupt_Status register.
> > > 
> > > Fixes: 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core")
> > > Signed-off-by: Furong Xu <0x1207@gmail.com>
> > > Reviewed-by: Simon Horman <horms@kernel.org>
> > > Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
> > > ---
> > > Changes in v4:
> > >   - fix a typo name of DDPP bit, thanks Serge Semin
> > > 
> > > Changes in v3:
> > >   - code style fix, thanks Paolo Abeni
> > > 
> > > Changes in v2:
> > >    - explicit enable Data Path Parity Protection
> > >    - add new counters to stmmac_safety_stats
> > >    - add detailed log
> > > ---
> > >   drivers/net/ethernet/stmicro/stmmac/common.h  |  1 +
> > >   .../net/ethernet/stmicro/stmmac/dwxgmac2.h    |  3 +
> > >   .../ethernet/stmicro/stmmac/dwxgmac2_core.c   | 57 ++++++++++++++++++-
> > >   3 files changed, 60 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
> > > index 721c1f8e892f..b4f60ab078d6 100644
> > > --- a/drivers/net/ethernet/stmicro/stmmac/common.h
> > > +++ b/drivers/net/ethernet/stmicro/stmmac/common.h
> > > @@ -216,6 +216,7 @@ struct stmmac_safety_stats {
> > >   	unsigned long mac_errors[32];
> > >   	unsigned long mtl_errors[32];
> > >   	unsigned long dma_errors[32];
> > > +	unsigned long dma_dpp_errors[32];
> > >   };
> > >   /* Number of fields in Safety Stats */
> > > diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
> > > index 207ff1799f2c..5c67a3f89f08 100644
> > > --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
> > > +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
> > > @@ -303,6 +303,8 @@
> > >   #define XGMAC_RXCEIE			BIT(4)
> > >   #define XGMAC_TXCEIE			BIT(0)
> > >   #define XGMAC_MTL_ECC_INT_STATUS	0x000010cc
> > > +#define XGMAC_MTL_DPP_CONTROL		0x000010e0
> > > +#define XGMAC_DPP_DISABLE		BIT(0)
> > >   #define XGMAC_MTL_TXQ_OPMODE(x)		(0x00001100 + (0x80 * (x)))
> > >   #define XGMAC_TQS			GENMASK(25, 16)
> > >   #define XGMAC_TQS_SHIFT			16
> > > @@ -385,6 +387,7 @@
> > >   #define XGMAC_DCEIE			BIT(1)
> > >   #define XGMAC_TCEIE			BIT(0)
> > >   #define XGMAC_DMA_ECC_INT_STATUS	0x0000306c
> > > +#define XGMAC_DMA_DPP_INT_STATUS	0x00003074
> > >   #define XGMAC_DMA_CH_CONTROL(x)		(0x00003100 + (0x80 * (x)))
> > >   #define XGMAC_SPH			BIT(24)
> > >   #define XGMAC_PBLx8			BIT(16)
> > > diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> > > index eb48211d9b0e..04d7c4dc2e35 100644
> > > --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> > > +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> > > @@ -830,6 +830,43 @@ static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= {
> > >   	{ false, "UNKNOWN", "Unknown Error" }, /* 31 */
> > >   };
> > > +static const char * const dpp_rx_err = "Read Rx Descriptor Parity checker Error";
> > > +static const char * const dpp_tx_err = "Read Tx Descriptor Parity checker Error";
> > > +static const struct dwxgmac3_error_desc dwxgmac3_dma_dpp_errors[32] = {
> > > +	{ true, "TDPES0", dpp_tx_err },
> > > +	{ true, "TDPES1", dpp_tx_err },
> > > +	{ true, "TDPES2", dpp_tx_err },
> > > +	{ true, "TDPES3", dpp_tx_err },
> > > +	{ true, "TDPES4", dpp_tx_err },
> > > +	{ true, "TDPES5", dpp_tx_err },
> > > +	{ true, "TDPES6", dpp_tx_err },
> > > +	{ true, "TDPES7", dpp_tx_err },
> > > +	{ true, "TDPES8", dpp_tx_err },
> > > +	{ true, "TDPES9", dpp_tx_err },
> > > +	{ true, "TDPES10", dpp_tx_err },
> > > +	{ true, "TDPES11", dpp_tx_err },
> > > +	{ true, "TDPES12", dpp_tx_err },
> > > +	{ true, "TDPES13", dpp_tx_err },
> > > +	{ true, "TDPES14", dpp_tx_err },
> > > +	{ true, "TDPES15", dpp_tx_err },
> > > +	{ true, "RDPES0", dpp_rx_err },
> > > +	{ true, "RDPES1", dpp_rx_err },
> > > +	{ true, "RDPES2", dpp_rx_err },
> > > +	{ true, "RDPES3", dpp_rx_err },
> > > +	{ true, "RDPES4", dpp_rx_err },
> > > +	{ true, "RDPES5", dpp_rx_err },
> > > +	{ true, "RDPES6", dpp_rx_err },
> > > +	{ true, "RDPES7", dpp_rx_err },
> > > +	{ true, "RDPES8", dpp_rx_err },
> > > +	{ true, "RDPES9", dpp_rx_err },
> > > +	{ true, "RDPES10", dpp_rx_err },
> > > +	{ true, "RDPES11", dpp_rx_err },
> > > +	{ true, "RDPES12", dpp_rx_err },
> > > +	{ true, "RDPES13", dpp_rx_err },
> > > +	{ true, "RDPES14", dpp_rx_err },
> > > +	{ true, "RDPES15", dpp_rx_err },
> > > +};
> > > +
> > >   static void dwxgmac3_handle_dma_err(struct net_device *ndev,
> > >   				    void __iomem *ioaddr, bool correctable,
> > >   				    struct stmmac_safety_stats *stats)
> > > @@ -841,6 +878,13 @@ static void dwxgmac3_handle_dma_err(struct net_device *ndev,
> > >   	dwxgmac3_log_error(ndev, value, correctable, "DMA",
> > >   			   dwxgmac3_dma_errors, STAT_OFF(dma_errors), stats);
> > > +
> > > +	value = readl(ioaddr + XGMAC_DMA_DPP_INT_STATUS);
> > > +	writel(value, ioaddr + XGMAC_DMA_DPP_INT_STATUS);
> > > +
> > > +	dwxgmac3_log_error(ndev, value, false, "DMA_DPP",
> > > +			   dwxgmac3_dma_dpp_errors,
> > > +			   STAT_OFF(dma_dpp_errors), stats);
> > >   }
> > >   static int
> > > @@ -881,6 +925,12 @@ dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp,
> > >   	value |= XGMAC_TMOUTEN; /* FSM Timeout Feature */
> > >   	writel(value, ioaddr + XGMAC_MAC_FSM_CONTROL);
> > > +	/* 5. Enable Data Path Parity Protection */
> > > +	value = readl(ioaddr + XGMAC_MTL_DPP_CONTROL);
> > > +	/* already enabled by default, explicit enable it again */
> > > +	value &= ~XGMAC_DPP_DISABLE;
> > > +	writel(value, ioaddr + XGMAC_MTL_DPP_CONTROL);
> > > +
> > >   	return 0;
> > >   }
> > > @@ -914,7 +964,11 @@ static int dwxgmac3_safety_feat_irq_status(struct net_device *ndev,
> > >   		ret |= !corr;
> > >   	}
> > > -	err = dma & (XGMAC_DEUIS | XGMAC_DECIS);
> > > +	/* DMA_DPP_Interrupt_Status is indicated by MCSIS bit in
> > > +	 * DMA_Safety_Interrupt_Status, so we handle DMA Data Path
> > > +	 * Parity Errors here
> > > +	 */
> > > +	err = dma & (XGMAC_DEUIS | XGMAC_DECIS | XGMAC_MCSIS);
> > >   	corr = dma & XGMAC_DECIS;
> > >   	if (err) {
> > >   		dwxgmac3_handle_dma_err(ndev, ioaddr, corr, stats);
> > > @@ -930,6 +984,7 @@ static const struct dwxgmac3_error {
> > >   	{ dwxgmac3_mac_errors },
> > >   	{ dwxgmac3_mtl_errors },
> > >   	{ dwxgmac3_dma_errors },
> > > +	{ dwxgmac3_dma_dpp_errors },
> > >   };
> > >   static int dwxgmac3_safety_feat_dump(struct stmmac_safety_stats *stats,
> > 
> > 
> > This change is breaking the build on some of our builders that are still using GCC 6.x ...
> > 
> > drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c:836:20: error: initialiser element is not constant
> >   { true, "TDPES0", dpp_tx_err },
> >                     ^~~~~~~~~~
> > drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c:836:20: note: (near initialisation for ‘dwxgmac3_dma_dpp_errors[0].detailed_desc’)
> > drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c:837:20: error: initialiser element is not constant
> >   { true, "TDPES1", dpp_tx_err },
> >                     ^~~~~~~~~~
> > drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c:837:20: note: (near initialisation for ‘dwxgmac3_dma_dpp_errors[1].detailed_desc’)
> > ...
> > 
> > I know that this is quite old but the minimum supported by the kernel is v5.1 ...
> > 
> > https://www.kernel.org/doc/html/next/process/changes.html
> 
> Thanks Jon,
> 
> I separately received a notification about this occurring with gcc 7.
> 
> https://lore.kernel.org/oe-kbuild-all/202402081135.lAxxBXHk-lkp@intel.com/
> 
> It is unclear to me why this occurs, as dpp_tx_err and dpp_tx_err are const.
> But I do seem to be able to address this problem by using #defines for
> these values instead.
> 
> I plan to post a patch shortly.

Patch posted:
- [PATCH net] net: stmmac: xgmac: use #define for string constants
  https://lore.kernel.org/netdev/20240208-xgmac-const-v1-1-e69a1eeabfc8@kernel.org/
  
Jon Hunter Feb. 8, 2024, 1:11 p.m. UTC | #3
On 08/02/2024 09:53, Simon Horman wrote:
> On Thu, Feb 08, 2024 at 09:26:27AM +0000, Simon Horman wrote:
>> On Wed, Feb 07, 2024 at 11:56:26AM +0000, Jon Hunter wrote:
>>>
>>> On 03/02/2024 05:14, Furong Xu wrote:
>>>> Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in
>>>> XGMAC core") checks and reports safety errors, but leaves the
>>>> Data Path Parity Errors for each channel in DMA unhandled at all, lead to
>>>> a storm of interrupt.
>>>> Fix it by checking and clearing the DMA_DPP_Interrupt_Status register.
>>>>
>>>> Fixes: 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core")
>>>> Signed-off-by: Furong Xu <0x1207@gmail.com>
>>>> Reviewed-by: Simon Horman <horms@kernel.org>
>>>> Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
>>>> ---
>>>> Changes in v4:
>>>>    - fix a typo name of DDPP bit, thanks Serge Semin
>>>>
>>>> Changes in v3:
>>>>    - code style fix, thanks Paolo Abeni
>>>>
>>>> Changes in v2:
>>>>     - explicit enable Data Path Parity Protection
>>>>     - add new counters to stmmac_safety_stats
>>>>     - add detailed log
>>>> ---
>>>>    drivers/net/ethernet/stmicro/stmmac/common.h  |  1 +
>>>>    .../net/ethernet/stmicro/stmmac/dwxgmac2.h    |  3 +
>>>>    .../ethernet/stmicro/stmmac/dwxgmac2_core.c   | 57 ++++++++++++++++++-
>>>>    3 files changed, 60 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
>>>> index 721c1f8e892f..b4f60ab078d6 100644
>>>> --- a/drivers/net/ethernet/stmicro/stmmac/common.h
>>>> +++ b/drivers/net/ethernet/stmicro/stmmac/common.h
>>>> @@ -216,6 +216,7 @@ struct stmmac_safety_stats {
>>>>    	unsigned long mac_errors[32];
>>>>    	unsigned long mtl_errors[32];
>>>>    	unsigned long dma_errors[32];
>>>> +	unsigned long dma_dpp_errors[32];
>>>>    };
>>>>    /* Number of fields in Safety Stats */
>>>> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
>>>> index 207ff1799f2c..5c67a3f89f08 100644
>>>> --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
>>>> +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
>>>> @@ -303,6 +303,8 @@
>>>>    #define XGMAC_RXCEIE			BIT(4)
>>>>    #define XGMAC_TXCEIE			BIT(0)
>>>>    #define XGMAC_MTL_ECC_INT_STATUS	0x000010cc
>>>> +#define XGMAC_MTL_DPP_CONTROL		0x000010e0
>>>> +#define XGMAC_DPP_DISABLE		BIT(0)
>>>>    #define XGMAC_MTL_TXQ_OPMODE(x)		(0x00001100 + (0x80 * (x)))
>>>>    #define XGMAC_TQS			GENMASK(25, 16)
>>>>    #define XGMAC_TQS_SHIFT			16
>>>> @@ -385,6 +387,7 @@
>>>>    #define XGMAC_DCEIE			BIT(1)
>>>>    #define XGMAC_TCEIE			BIT(0)
>>>>    #define XGMAC_DMA_ECC_INT_STATUS	0x0000306c
>>>> +#define XGMAC_DMA_DPP_INT_STATUS	0x00003074
>>>>    #define XGMAC_DMA_CH_CONTROL(x)		(0x00003100 + (0x80 * (x)))
>>>>    #define XGMAC_SPH			BIT(24)
>>>>    #define XGMAC_PBLx8			BIT(16)
>>>> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
>>>> index eb48211d9b0e..04d7c4dc2e35 100644
>>>> --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
>>>> +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
>>>> @@ -830,6 +830,43 @@ static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= {
>>>>    	{ false, "UNKNOWN", "Unknown Error" }, /* 31 */
>>>>    };
>>>> +static const char * const dpp_rx_err = "Read Rx Descriptor Parity checker Error";
>>>> +static const char * const dpp_tx_err = "Read Tx Descriptor Parity checker Error";
>>>> +static const struct dwxgmac3_error_desc dwxgmac3_dma_dpp_errors[32] = {
>>>> +	{ true, "TDPES0", dpp_tx_err },
>>>> +	{ true, "TDPES1", dpp_tx_err },
>>>> +	{ true, "TDPES2", dpp_tx_err },
>>>> +	{ true, "TDPES3", dpp_tx_err },
>>>> +	{ true, "TDPES4", dpp_tx_err },
>>>> +	{ true, "TDPES5", dpp_tx_err },
>>>> +	{ true, "TDPES6", dpp_tx_err },
>>>> +	{ true, "TDPES7", dpp_tx_err },
>>>> +	{ true, "TDPES8", dpp_tx_err },
>>>> +	{ true, "TDPES9", dpp_tx_err },
>>>> +	{ true, "TDPES10", dpp_tx_err },
>>>> +	{ true, "TDPES11", dpp_tx_err },
>>>> +	{ true, "TDPES12", dpp_tx_err },
>>>> +	{ true, "TDPES13", dpp_tx_err },
>>>> +	{ true, "TDPES14", dpp_tx_err },
>>>> +	{ true, "TDPES15", dpp_tx_err },
>>>> +	{ true, "RDPES0", dpp_rx_err },
>>>> +	{ true, "RDPES1", dpp_rx_err },
>>>> +	{ true, "RDPES2", dpp_rx_err },
>>>> +	{ true, "RDPES3", dpp_rx_err },
>>>> +	{ true, "RDPES4", dpp_rx_err },
>>>> +	{ true, "RDPES5", dpp_rx_err },
>>>> +	{ true, "RDPES6", dpp_rx_err },
>>>> +	{ true, "RDPES7", dpp_rx_err },
>>>> +	{ true, "RDPES8", dpp_rx_err },
>>>> +	{ true, "RDPES9", dpp_rx_err },
>>>> +	{ true, "RDPES10", dpp_rx_err },
>>>> +	{ true, "RDPES11", dpp_rx_err },
>>>> +	{ true, "RDPES12", dpp_rx_err },
>>>> +	{ true, "RDPES13", dpp_rx_err },
>>>> +	{ true, "RDPES14", dpp_rx_err },
>>>> +	{ true, "RDPES15", dpp_rx_err },
>>>> +};
>>>> +
>>>>    static void dwxgmac3_handle_dma_err(struct net_device *ndev,
>>>>    				    void __iomem *ioaddr, bool correctable,
>>>>    				    struct stmmac_safety_stats *stats)
>>>> @@ -841,6 +878,13 @@ static void dwxgmac3_handle_dma_err(struct net_device *ndev,
>>>>    	dwxgmac3_log_error(ndev, value, correctable, "DMA",
>>>>    			   dwxgmac3_dma_errors, STAT_OFF(dma_errors), stats);
>>>> +
>>>> +	value = readl(ioaddr + XGMAC_DMA_DPP_INT_STATUS);
>>>> +	writel(value, ioaddr + XGMAC_DMA_DPP_INT_STATUS);
>>>> +
>>>> +	dwxgmac3_log_error(ndev, value, false, "DMA_DPP",
>>>> +			   dwxgmac3_dma_dpp_errors,
>>>> +			   STAT_OFF(dma_dpp_errors), stats);
>>>>    }
>>>>    static int
>>>> @@ -881,6 +925,12 @@ dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp,
>>>>    	value |= XGMAC_TMOUTEN; /* FSM Timeout Feature */
>>>>    	writel(value, ioaddr + XGMAC_MAC_FSM_CONTROL);
>>>> +	/* 5. Enable Data Path Parity Protection */
>>>> +	value = readl(ioaddr + XGMAC_MTL_DPP_CONTROL);
>>>> +	/* already enabled by default, explicit enable it again */
>>>> +	value &= ~XGMAC_DPP_DISABLE;
>>>> +	writel(value, ioaddr + XGMAC_MTL_DPP_CONTROL);
>>>> +
>>>>    	return 0;
>>>>    }
>>>> @@ -914,7 +964,11 @@ static int dwxgmac3_safety_feat_irq_status(struct net_device *ndev,
>>>>    		ret |= !corr;
>>>>    	}
>>>> -	err = dma & (XGMAC_DEUIS | XGMAC_DECIS);
>>>> +	/* DMA_DPP_Interrupt_Status is indicated by MCSIS bit in
>>>> +	 * DMA_Safety_Interrupt_Status, so we handle DMA Data Path
>>>> +	 * Parity Errors here
>>>> +	 */
>>>> +	err = dma & (XGMAC_DEUIS | XGMAC_DECIS | XGMAC_MCSIS);
>>>>    	corr = dma & XGMAC_DECIS;
>>>>    	if (err) {
>>>>    		dwxgmac3_handle_dma_err(ndev, ioaddr, corr, stats);
>>>> @@ -930,6 +984,7 @@ static const struct dwxgmac3_error {
>>>>    	{ dwxgmac3_mac_errors },
>>>>    	{ dwxgmac3_mtl_errors },
>>>>    	{ dwxgmac3_dma_errors },
>>>> +	{ dwxgmac3_dma_dpp_errors },
>>>>    };
>>>>    static int dwxgmac3_safety_feat_dump(struct stmmac_safety_stats *stats,
>>>
>>>
>>> This change is breaking the build on some of our builders that are still using GCC 6.x ...
>>>
>>> drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c:836:20: error: initialiser element is not constant
>>>    { true, "TDPES0", dpp_tx_err },
>>>                      ^~~~~~~~~~
>>> drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c:836:20: note: (near initialisation for ‘dwxgmac3_dma_dpp_errors[0].detailed_desc’)
>>> drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c:837:20: error: initialiser element is not constant
>>>    { true, "TDPES1", dpp_tx_err },
>>>                      ^~~~~~~~~~
>>> drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c:837:20: note: (near initialisation for ‘dwxgmac3_dma_dpp_errors[1].detailed_desc’)
>>> ...
>>>
>>> I know that this is quite old but the minimum supported by the kernel is v5.1 ...
>>>
>>> https://www.kernel.org/doc/html/next/process/changes.html
>>
>> Thanks Jon,
>>
>> I separately received a notification about this occurring with gcc 7.
>>
>> https://lore.kernel.org/oe-kbuild-all/202402081135.lAxxBXHk-lkp@intel.com/
>>
>> It is unclear to me why this occurs, as dpp_tx_err and dpp_tx_err are const.
>> But I do seem to be able to address this problem by using #defines for
>> these values instead.
>>
>> I plan to post a patch shortly.
> 
> Patch posted:
> - [PATCH net] net: stmmac: xgmac: use #define for string constants
>    https://lore.kernel.org/netdev/20240208-xgmac-const-v1-1-e69a1eeabfc8@kernel.org/
> 


Thanks for fixing! Works for me.

Jon
  

Patch

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 721c1f8e892f..b4f60ab078d6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -216,6 +216,7 @@  struct stmmac_safety_stats {
 	unsigned long mac_errors[32];
 	unsigned long mtl_errors[32];
 	unsigned long dma_errors[32];
+	unsigned long dma_dpp_errors[32];
 };
 
 /* Number of fields in Safety Stats */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 207ff1799f2c..5c67a3f89f08 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -303,6 +303,8 @@ 
 #define XGMAC_RXCEIE			BIT(4)
 #define XGMAC_TXCEIE			BIT(0)
 #define XGMAC_MTL_ECC_INT_STATUS	0x000010cc
+#define XGMAC_MTL_DPP_CONTROL		0x000010e0
+#define XGMAC_DPP_DISABLE		BIT(0)
 #define XGMAC_MTL_TXQ_OPMODE(x)		(0x00001100 + (0x80 * (x)))
 #define XGMAC_TQS			GENMASK(25, 16)
 #define XGMAC_TQS_SHIFT			16
@@ -385,6 +387,7 @@ 
 #define XGMAC_DCEIE			BIT(1)
 #define XGMAC_TCEIE			BIT(0)
 #define XGMAC_DMA_ECC_INT_STATUS	0x0000306c
+#define XGMAC_DMA_DPP_INT_STATUS	0x00003074
 #define XGMAC_DMA_CH_CONTROL(x)		(0x00003100 + (0x80 * (x)))
 #define XGMAC_SPH			BIT(24)
 #define XGMAC_PBLx8			BIT(16)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index eb48211d9b0e..04d7c4dc2e35 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -830,6 +830,43 @@  static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= {
 	{ false, "UNKNOWN", "Unknown Error" }, /* 31 */
 };
 
+static const char * const dpp_rx_err = "Read Rx Descriptor Parity checker Error";
+static const char * const dpp_tx_err = "Read Tx Descriptor Parity checker Error";
+static const struct dwxgmac3_error_desc dwxgmac3_dma_dpp_errors[32] = {
+	{ true, "TDPES0", dpp_tx_err },
+	{ true, "TDPES1", dpp_tx_err },
+	{ true, "TDPES2", dpp_tx_err },
+	{ true, "TDPES3", dpp_tx_err },
+	{ true, "TDPES4", dpp_tx_err },
+	{ true, "TDPES5", dpp_tx_err },
+	{ true, "TDPES6", dpp_tx_err },
+	{ true, "TDPES7", dpp_tx_err },
+	{ true, "TDPES8", dpp_tx_err },
+	{ true, "TDPES9", dpp_tx_err },
+	{ true, "TDPES10", dpp_tx_err },
+	{ true, "TDPES11", dpp_tx_err },
+	{ true, "TDPES12", dpp_tx_err },
+	{ true, "TDPES13", dpp_tx_err },
+	{ true, "TDPES14", dpp_tx_err },
+	{ true, "TDPES15", dpp_tx_err },
+	{ true, "RDPES0", dpp_rx_err },
+	{ true, "RDPES1", dpp_rx_err },
+	{ true, "RDPES2", dpp_rx_err },
+	{ true, "RDPES3", dpp_rx_err },
+	{ true, "RDPES4", dpp_rx_err },
+	{ true, "RDPES5", dpp_rx_err },
+	{ true, "RDPES6", dpp_rx_err },
+	{ true, "RDPES7", dpp_rx_err },
+	{ true, "RDPES8", dpp_rx_err },
+	{ true, "RDPES9", dpp_rx_err },
+	{ true, "RDPES10", dpp_rx_err },
+	{ true, "RDPES11", dpp_rx_err },
+	{ true, "RDPES12", dpp_rx_err },
+	{ true, "RDPES13", dpp_rx_err },
+	{ true, "RDPES14", dpp_rx_err },
+	{ true, "RDPES15", dpp_rx_err },
+};
+
 static void dwxgmac3_handle_dma_err(struct net_device *ndev,
 				    void __iomem *ioaddr, bool correctable,
 				    struct stmmac_safety_stats *stats)
@@ -841,6 +878,13 @@  static void dwxgmac3_handle_dma_err(struct net_device *ndev,
 
 	dwxgmac3_log_error(ndev, value, correctable, "DMA",
 			   dwxgmac3_dma_errors, STAT_OFF(dma_errors), stats);
+
+	value = readl(ioaddr + XGMAC_DMA_DPP_INT_STATUS);
+	writel(value, ioaddr + XGMAC_DMA_DPP_INT_STATUS);
+
+	dwxgmac3_log_error(ndev, value, false, "DMA_DPP",
+			   dwxgmac3_dma_dpp_errors,
+			   STAT_OFF(dma_dpp_errors), stats);
 }
 
 static int
@@ -881,6 +925,12 @@  dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp,
 	value |= XGMAC_TMOUTEN; /* FSM Timeout Feature */
 	writel(value, ioaddr + XGMAC_MAC_FSM_CONTROL);
 
+	/* 5. Enable Data Path Parity Protection */
+	value = readl(ioaddr + XGMAC_MTL_DPP_CONTROL);
+	/* already enabled by default, explicit enable it again */
+	value &= ~XGMAC_DPP_DISABLE;
+	writel(value, ioaddr + XGMAC_MTL_DPP_CONTROL);
+
 	return 0;
 }
 
@@ -914,7 +964,11 @@  static int dwxgmac3_safety_feat_irq_status(struct net_device *ndev,
 		ret |= !corr;
 	}
 
-	err = dma & (XGMAC_DEUIS | XGMAC_DECIS);
+	/* DMA_DPP_Interrupt_Status is indicated by MCSIS bit in
+	 * DMA_Safety_Interrupt_Status, so we handle DMA Data Path
+	 * Parity Errors here
+	 */
+	err = dma & (XGMAC_DEUIS | XGMAC_DECIS | XGMAC_MCSIS);
 	corr = dma & XGMAC_DECIS;
 	if (err) {
 		dwxgmac3_handle_dma_err(ndev, ioaddr, corr, stats);
@@ -930,6 +984,7 @@  static const struct dwxgmac3_error {
 	{ dwxgmac3_mac_errors },
 	{ dwxgmac3_mtl_errors },
 	{ dwxgmac3_dma_errors },
+	{ dwxgmac3_dma_dpp_errors },
 };
 
 static int dwxgmac3_safety_feat_dump(struct stmmac_safety_stats *stats,