[v9,1/7] irqchip/gic-v3: Enable support for SGIs to act as NMIs

Message ID 20230601143109.v9.1.I1223c11c88937bd0cbd9b086d4ef216985797302@changeid
State New
Headers
Series arm64: Add debug IPI for backtraces / kgdb; try to use NMI for it |

Commit Message

Doug Anderson June 1, 2023, 9:31 p.m. UTC
  From: Sumit Garg <sumit.garg@linaro.org>

Add support to handle SGIs as pseudo NMIs. As SGIs or IPIs default to a
special flow handler: handle_percpu_devid_fasteoi_ipi(), so skip NMI
handler update in case of SGIs.

Also, enable NMI support prior to gic_smp_init() as allocation of SGIs
as IRQs/NMIs happen as part of this routine.

Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Tested-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
---

(no changes since v1)

 drivers/irqchip/irq-gic-v3.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)
  

Comments

Mark Rutland Aug. 7, 2023, 9:50 a.m. UTC | #1
Hi Doug,

On Thu, Jun 01, 2023 at 02:31:45PM -0700, Douglas Anderson wrote:
> From: Sumit Garg <sumit.garg@linaro.org>
> 
> Add support to handle SGIs as pseudo NMIs. As SGIs or IPIs default to a
> special flow handler: handle_percpu_devid_fasteoi_ipi(), so skip NMI
> handler update in case of SGIs.

I couldn't find handle_percpu_devid_fasteoi_ipi() in mainline, and when
researching I found that we changed that in commit:

  6abbd6988971aaa6 ("irqchip/gic, gic-v3: Make SGIs use handle_percpu_devid_irq()")

... which was in v5.11, so it looks like this is stale?

Since that commit, SGIs are treated the same as PPIs/EPPIs, and use
handle_percpu_devid_irq() by default.

IIUC handle_percpu_devid_irq() isn't NMI safe, and so to run in an NMI context
those should use handle_percpu_devid_fasteoi_nmi().

Marc, does that sound right to you? i.e. SGI NMIs should be handled exactly the
same as PPI NMIs, and use handle_percpu_devid_fasteoi_nmi()?

I have some comments below assuming that SGI NMIs should use
handle_percpu_devid_fasteoi_nmi().

> Also, enable NMI support prior to gic_smp_init() as allocation of SGIs
> as IRQs/NMIs happen as part of this routine.

This bit looks fine to me.

> Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
> Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
> Tested-by: Chen-Yu Tsai <wens@csie.org>
> Signed-off-by: Douglas Anderson <dianders@chromium.org>
> ---
> 
> (no changes since v1)
> 
>  drivers/irqchip/irq-gic-v3.c | 29 +++++++++++++++++++++--------
>  1 file changed, 21 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
> index 0c6c1af9a5b7..ed37e02d4c5f 100644
> --- a/drivers/irqchip/irq-gic-v3.c
> +++ b/drivers/irqchip/irq-gic-v3.c
> @@ -525,6 +525,7 @@ static u32 gic_get_ppi_index(struct irq_data *d)
>  static int gic_irq_nmi_setup(struct irq_data *d)
>  {
>  	struct irq_desc *desc = irq_to_desc(d->irq);
> +	u32 idx;
>  
>  	if (!gic_supports_nmi())
>  		return -EINVAL;
> @@ -542,16 +543,22 @@ static int gic_irq_nmi_setup(struct irq_data *d)
>  		return -EINVAL;
>  
>  	/* desc lock should already be held */
> -	if (gic_irq_in_rdist(d)) {
> -		u32 idx = gic_get_ppi_index(d);
> +	switch (get_intid_range(d)) {
> +	case SGI_RANGE:
> +		break;
> +	case PPI_RANGE:
> +	case EPPI_RANGE:
> +		idx = gic_get_ppi_index(d);
>  
>  		/* Setting up PPI as NMI, only switch handler for first NMI */
>  		if (!refcount_inc_not_zero(&ppi_nmi_refs[idx])) {
>  			refcount_set(&ppi_nmi_refs[idx], 1);
>  			desc->handle_irq = handle_percpu_devid_fasteoi_nmi;
>  		}
> -	} else {
> +		break;
> +	default:
>  		desc->handle_irq = handle_fasteoi_nmi;
> +		break;
>  	}

As above, I reckon this isn't right, and we should treat all rdist interrupts
(which are all percpu) the same.

I reckon what we should be doing here is make ppi_nmi_refs cover all of the
rdist interrupts (e.g. make that rdist_nmi_refs, add a gic_get_rdist_idx()
helper), and then here have something like:

	if (gic_irq_in_rdist(d)) {
		u32 idx = gic_get_rdist_idx(d);

		/* 
		 * Setting up a percpu interrupt as NMI, only switch handler
		 * for first NMI
		 */
		if (!refcount_inc_not_zero(&rdist_nmi_refs[idx])) {
			refcount_set(&ppi_nmi_refs[idx], 1);
			desc->handle_irq = handle_percpu_devid_fasteoi_nmi;
		}
	}

... as an aside, it'd be nicer if we could switch the handler at request time,
as then we wouldn't need the refcount at all, but I couldn't see a good irqchip
hook to hang that off, so I don't think that needs to change as a prerequisite.

>  
>  	gic_irq_set_prio(d, GICD_INT_NMI_PRI);
> @@ -562,6 +569,7 @@ static int gic_irq_nmi_setup(struct irq_data *d)
>  static void gic_irq_nmi_teardown(struct irq_data *d)
>  {
>  	struct irq_desc *desc = irq_to_desc(d->irq);
> +	u32 idx;
>  
>  	if (WARN_ON(!gic_supports_nmi()))
>  		return;
> @@ -579,14 +587,20 @@ static void gic_irq_nmi_teardown(struct irq_data *d)
>  		return;
>  
>  	/* desc lock should already be held */
> -	if (gic_irq_in_rdist(d)) {
> -		u32 idx = gic_get_ppi_index(d);
> +	switch (get_intid_range(d)) {
> +	case SGI_RANGE:
> +		break;
> +	case PPI_RANGE:
> +	case EPPI_RANGE:
> +		idx = gic_get_ppi_index(d);
>  
>  		/* Tearing down NMI, only switch handler for last NMI */
>  		if (refcount_dec_and_test(&ppi_nmi_refs[idx]))
>  			desc->handle_irq = handle_percpu_devid_irq;
> -	} else {
> +		break;
> +	default:
>  		desc->handle_irq = handle_fasteoi_irq;
> +		break;
>  	}

Same comments as for gic_irq_nmi_setup() here.

>  
>  	gic_irq_set_prio(d, GICD_INT_DEF_PRI);
> @@ -2001,6 +2015,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base,
>  
>  	gic_dist_init();
>  	gic_cpu_init();
> +	gic_enable_nmi_support();
>  	gic_smp_init();
>  	gic_cpu_pm_init();
>  
> @@ -2013,8 +2028,6 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base,
>  			gicv2m_init(handle, gic_data.domain);
>  	}
>  
> -	gic_enable_nmi_support();
> -

This bit looks fine to me.

Thanks,
Mark.
  
Sumit Garg Aug. 7, 2023, 11:22 a.m. UTC | #2
Hi Mark,

On Mon, 7 Aug 2023 at 15:20, Mark Rutland <mark.rutland@arm.com> wrote:
>
> Hi Doug,
>
> On Thu, Jun 01, 2023 at 02:31:45PM -0700, Douglas Anderson wrote:
> > From: Sumit Garg <sumit.garg@linaro.org>
> >
> > Add support to handle SGIs as pseudo NMIs. As SGIs or IPIs default to a
> > special flow handler: handle_percpu_devid_fasteoi_ipi(), so skip NMI
> > handler update in case of SGIs.
>
> I couldn't find handle_percpu_devid_fasteoi_ipi() in mainline, and when
> researching I found that we changed that in commit:
>
>   6abbd6988971aaa6 ("irqchip/gic, gic-v3: Make SGIs use handle_percpu_devid_irq()")
>
> ... which was in v5.11, so it looks like this is stale?

The last time I tested this patchset (v7 [1]) was with kernel
v5.9.0-rc3. So I agree with you that later
handle_percpu_devid_fasteoi_ipi() was removed and the SGI handling
flow almost became identical to PPI.

[1] https://lore.kernel.org/linux-arm-kernel/1604317487-14543-1-git-send-email-sumit.garg@linaro.org/

>
> Since that commit, SGIs are treated the same as PPIs/EPPIs, and use
> handle_percpu_devid_irq() by default.
>
> IIUC handle_percpu_devid_irq() isn't NMI safe, and so to run in an NMI context
> those should use handle_percpu_devid_fasteoi_nmi().

True.

>
> Marc, does that sound right to you? i.e. SGI NMIs should be handled exactly the
> same as PPI NMIs, and use handle_percpu_devid_fasteoi_nmi()?
>
> I have some comments below assuming that SGI NMIs should use
> handle_percpu_devid_fasteoi_nmi().
>

This sounds fine to me.

> > Also, enable NMI support prior to gic_smp_init() as allocation of SGIs
> > as IRQs/NMIs happen as part of this routine.
>
> This bit looks fine to me.
>
> > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
> > Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
> > Tested-by: Chen-Yu Tsai <wens@csie.org>
> > Signed-off-by: Douglas Anderson <dianders@chromium.org>
> > ---
> >
> > (no changes since v1)
> >
> >  drivers/irqchip/irq-gic-v3.c | 29 +++++++++++++++++++++--------
> >  1 file changed, 21 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
> > index 0c6c1af9a5b7..ed37e02d4c5f 100644
> > --- a/drivers/irqchip/irq-gic-v3.c
> > +++ b/drivers/irqchip/irq-gic-v3.c
> > @@ -525,6 +525,7 @@ static u32 gic_get_ppi_index(struct irq_data *d)
> >  static int gic_irq_nmi_setup(struct irq_data *d)
> >  {
> >       struct irq_desc *desc = irq_to_desc(d->irq);
> > +     u32 idx;
> >
> >       if (!gic_supports_nmi())
> >               return -EINVAL;
> > @@ -542,16 +543,22 @@ static int gic_irq_nmi_setup(struct irq_data *d)
> >               return -EINVAL;
> >
> >       /* desc lock should already be held */
> > -     if (gic_irq_in_rdist(d)) {
> > -             u32 idx = gic_get_ppi_index(d);
> > +     switch (get_intid_range(d)) {
> > +     case SGI_RANGE:
> > +             break;
> > +     case PPI_RANGE:
> > +     case EPPI_RANGE:
> > +             idx = gic_get_ppi_index(d);
> >
> >               /* Setting up PPI as NMI, only switch handler for first NMI */
> >               if (!refcount_inc_not_zero(&ppi_nmi_refs[idx])) {
> >                       refcount_set(&ppi_nmi_refs[idx], 1);
> >                       desc->handle_irq = handle_percpu_devid_fasteoi_nmi;
> >               }
> > -     } else {
> > +             break;
> > +     default:
> >               desc->handle_irq = handle_fasteoi_nmi;
> > +             break;
> >       }
>
> As above, I reckon this isn't right, and we should treat all rdist interrupts
> (which are all percpu) the same.
>
> I reckon what we should be doing here is make ppi_nmi_refs cover all of the
> rdist interrupts (e.g. make that rdist_nmi_refs, add a gic_get_rdist_idx()
> helper), and then here have something like:
>
>         if (gic_irq_in_rdist(d)) {
>                 u32 idx = gic_get_rdist_idx(d);
>
>                 /*
>                  * Setting up a percpu interrupt as NMI, only switch handler
>                  * for first NMI
>                  */
>                 if (!refcount_inc_not_zero(&rdist_nmi_refs[idx])) {
>                         refcount_set(&ppi_nmi_refs[idx], 1);
>                         desc->handle_irq = handle_percpu_devid_fasteoi_nmi;
>                 }
>         }

It looks like you missed the else part here as follows for all other
interrupt types:

         } else {
                 desc->handle_irq = handle_fasteoi_nmi;
         }

Apart from that, your logic sounds good to me.

-Sumit

>
> ... as an aside, it'd be nicer if we could switch the handler at request time,
> as then we wouldn't need the refcount at all, but I couldn't see a good irqchip
> hook to hang that off, so I don't think that needs to change as a prerequisite.
>
> >
> >       gic_irq_set_prio(d, GICD_INT_NMI_PRI);
> > @@ -562,6 +569,7 @@ static int gic_irq_nmi_setup(struct irq_data *d)
> >  static void gic_irq_nmi_teardown(struct irq_data *d)
> >  {
> >       struct irq_desc *desc = irq_to_desc(d->irq);
> > +     u32 idx;
> >
> >       if (WARN_ON(!gic_supports_nmi()))
> >               return;
> > @@ -579,14 +587,20 @@ static void gic_irq_nmi_teardown(struct irq_data *d)
> >               return;
> >
> >       /* desc lock should already be held */
> > -     if (gic_irq_in_rdist(d)) {
> > -             u32 idx = gic_get_ppi_index(d);
> > +     switch (get_intid_range(d)) {
> > +     case SGI_RANGE:
> > +             break;
> > +     case PPI_RANGE:
> > +     case EPPI_RANGE:
> > +             idx = gic_get_ppi_index(d);
> >
> >               /* Tearing down NMI, only switch handler for last NMI */
> >               if (refcount_dec_and_test(&ppi_nmi_refs[idx]))
> >                       desc->handle_irq = handle_percpu_devid_irq;
> > -     } else {
> > +             break;
> > +     default:
> >               desc->handle_irq = handle_fasteoi_irq;
> > +             break;
> >       }
>
> Same comments as for gic_irq_nmi_setup() here.
>
> >
> >       gic_irq_set_prio(d, GICD_INT_DEF_PRI);
> > @@ -2001,6 +2015,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base,
> >
> >       gic_dist_init();
> >       gic_cpu_init();
> > +     gic_enable_nmi_support();
> >       gic_smp_init();
> >       gic_cpu_pm_init();
> >
> > @@ -2013,8 +2028,6 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base,
> >                       gicv2m_init(handle, gic_data.domain);
> >       }
> >
> > -     gic_enable_nmi_support();
> > -
>
> This bit looks fine to me.
>
> Thanks,
> Mark.
  
Mark Rutland Aug. 7, 2023, 1:25 p.m. UTC | #3
On Mon, Aug 07, 2023 at 04:52:40PM +0530, Sumit Garg wrote:
> On Mon, 7 Aug 2023 at 15:20, Mark Rutland <mark.rutland@arm.com> wrote:
> > On Thu, Jun 01, 2023 at 02:31:45PM -0700, Douglas Anderson wrote:
> > > @@ -542,16 +543,22 @@ static int gic_irq_nmi_setup(struct irq_data *d)
> > >               return -EINVAL;
> > >
> > >       /* desc lock should already be held */
> > > -     if (gic_irq_in_rdist(d)) {
> > > -             u32 idx = gic_get_ppi_index(d);
> > > +     switch (get_intid_range(d)) {
> > > +     case SGI_RANGE:
> > > +             break;
> > > +     case PPI_RANGE:
> > > +     case EPPI_RANGE:
> > > +             idx = gic_get_ppi_index(d);
> > >
> > >               /* Setting up PPI as NMI, only switch handler for first NMI */
> > >               if (!refcount_inc_not_zero(&ppi_nmi_refs[idx])) {
> > >                       refcount_set(&ppi_nmi_refs[idx], 1);
> > >                       desc->handle_irq = handle_percpu_devid_fasteoi_nmi;
> > >               }
> > > -     } else {
> > > +             break;
> > > +     default:
> > >               desc->handle_irq = handle_fasteoi_nmi;
> > > +             break;
> > >       }
> >
> > As above, I reckon this isn't right, and we should treat all rdist interrupts
> > (which are all percpu) the same.
> >
> > I reckon what we should be doing here is make ppi_nmi_refs cover all of the
> > rdist interrupts (e.g. make that rdist_nmi_refs, add a gic_get_rdist_idx()
> > helper), and then here have something like:
> >
> >         if (gic_irq_in_rdist(d)) {
> >                 u32 idx = gic_get_rdist_idx(d);
> >
> >                 /*
> >                  * Setting up a percpu interrupt as NMI, only switch handler
> >                  * for first NMI
> >                  */
> >                 if (!refcount_inc_not_zero(&rdist_nmi_refs[idx])) {
> >                         refcount_set(&ppi_nmi_refs[idx], 1);
> >                         desc->handle_irq = handle_percpu_devid_fasteoi_nmi;
> >                 }
> >         }
> 
> It looks like you missed the else part here as follows for all other
> interrupt types:
> 
>          } else {
>                  desc->handle_irq = handle_fasteoi_nmi;
>          }

Yes, sorry; I had meant for that to be included, i.e.

	if (gic_irq_in_rdist(d)) {
		u32 idx = gic_get_rdist_idx(d);

		/*
		 * Setting up a percpu interrupt as NMI, only switch handler
		 * for first NMI
		 */
		if (!refcount_inc_not_zero(&rdist_nmi_refs[idx])) {
			refcount_set(&ppi_nmi_refs[idx], 1);
			desc->handle_irq = handle_percpu_devid_fasteoi_nmi;
		}
	} else {
		desc->handle_irq = handle_fasteoi_nmi;
	}

> Apart from that, your logic sounds good to me.

Great!

Thanks,
Mark.
  

Patch

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 0c6c1af9a5b7..ed37e02d4c5f 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -525,6 +525,7 @@  static u32 gic_get_ppi_index(struct irq_data *d)
 static int gic_irq_nmi_setup(struct irq_data *d)
 {
 	struct irq_desc *desc = irq_to_desc(d->irq);
+	u32 idx;
 
 	if (!gic_supports_nmi())
 		return -EINVAL;
@@ -542,16 +543,22 @@  static int gic_irq_nmi_setup(struct irq_data *d)
 		return -EINVAL;
 
 	/* desc lock should already be held */
-	if (gic_irq_in_rdist(d)) {
-		u32 idx = gic_get_ppi_index(d);
+	switch (get_intid_range(d)) {
+	case SGI_RANGE:
+		break;
+	case PPI_RANGE:
+	case EPPI_RANGE:
+		idx = gic_get_ppi_index(d);
 
 		/* Setting up PPI as NMI, only switch handler for first NMI */
 		if (!refcount_inc_not_zero(&ppi_nmi_refs[idx])) {
 			refcount_set(&ppi_nmi_refs[idx], 1);
 			desc->handle_irq = handle_percpu_devid_fasteoi_nmi;
 		}
-	} else {
+		break;
+	default:
 		desc->handle_irq = handle_fasteoi_nmi;
+		break;
 	}
 
 	gic_irq_set_prio(d, GICD_INT_NMI_PRI);
@@ -562,6 +569,7 @@  static int gic_irq_nmi_setup(struct irq_data *d)
 static void gic_irq_nmi_teardown(struct irq_data *d)
 {
 	struct irq_desc *desc = irq_to_desc(d->irq);
+	u32 idx;
 
 	if (WARN_ON(!gic_supports_nmi()))
 		return;
@@ -579,14 +587,20 @@  static void gic_irq_nmi_teardown(struct irq_data *d)
 		return;
 
 	/* desc lock should already be held */
-	if (gic_irq_in_rdist(d)) {
-		u32 idx = gic_get_ppi_index(d);
+	switch (get_intid_range(d)) {
+	case SGI_RANGE:
+		break;
+	case PPI_RANGE:
+	case EPPI_RANGE:
+		idx = gic_get_ppi_index(d);
 
 		/* Tearing down NMI, only switch handler for last NMI */
 		if (refcount_dec_and_test(&ppi_nmi_refs[idx]))
 			desc->handle_irq = handle_percpu_devid_irq;
-	} else {
+		break;
+	default:
 		desc->handle_irq = handle_fasteoi_irq;
+		break;
 	}
 
 	gic_irq_set_prio(d, GICD_INT_DEF_PRI);
@@ -2001,6 +2015,7 @@  static int __init gic_init_bases(phys_addr_t dist_phys_base,
 
 	gic_dist_init();
 	gic_cpu_init();
+	gic_enable_nmi_support();
 	gic_smp_init();
 	gic_cpu_pm_init();
 
@@ -2013,8 +2028,6 @@  static int __init gic_init_bases(phys_addr_t dist_phys_base,
 			gicv2m_init(handle, gic_data.domain);
 	}
 
-	gic_enable_nmi_support();
-
 	return 0;
 
 out_free: