irqchip/gic-v3: Workaround for GIC-700 erratum 2941627
Commit Message
GIC700 erratum 2941627 may cause GIC-700 missing SPIs wake
requests when SPIs are deactivated while targeting a
sleeping CPU - ie a CPU for which the redistributor:
GICR_WAKER.ProcessorSleep == 1
This runtime situation can happen if an SPI that has been
activated on a core is retargeted to a different core, it
becomes pending and the target core subsequently enters a
power state quiescing the respective redistributor.
When this situation is hit, the de-activation carried out
on the core that activated the SPI (through either ICC_EOIR1_EL1
or ICC_DIR_EL1 register writes) does not trigger a wake
requests for the sleeping GIC redistributor even if the SPI
is pending.
Fix the erratum by de-activating the SPI using the
redistributor GICD_ICACTIVER register if the runtime
conditions require it (ie the IRQ was retargeted between
activation and de-activation).
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
---
Documentation/arm64/silicon-errata.rst | 3 ++
drivers/irqchip/irq-gic-v3.c | 71 +++++++++++++++++++++++++-
2 files changed, 73 insertions(+), 1 deletion(-)
Comments
Lorenzo,
On Tue, 04 Jul 2023 13:34:36 +0100,
Lorenzo Pieralisi <lpieralisi@kernel.org> wrote:
>
> GIC700 erratum 2941627 may cause GIC-700 missing SPIs wake
> requests when SPIs are deactivated while targeting a
> sleeping CPU - ie a CPU for which the redistributor:
>
> GICR_WAKER.ProcessorSleep == 1
>
> This runtime situation can happen if an SPI that has been
> activated on a core is retargeted to a different core, it
> becomes pending and the target core subsequently enters a
> power state quiescing the respective redistributor.
>
> When this situation is hit, the de-activation carried out
> on the core that activated the SPI (through either ICC_EOIR1_EL1
> or ICC_DIR_EL1 register writes) does not trigger a wake
> requests for the sleeping GIC redistributor even if the SPI
> is pending.
>
> Fix the erratum by de-activating the SPI using the
s/Fix/ Work around/
> redistributor GICD_ICACTIVER register if the runtime
> conditions require it (ie the IRQ was retargeted between
> activation and de-activation).
>
> Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
> Cc: Marc Zyngier <maz@kernel.org>
> ---
> Documentation/arm64/silicon-errata.rst | 3 ++
> drivers/irqchip/irq-gic-v3.c | 71 +++++++++++++++++++++++++-
> 2 files changed, 73 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
> index 9e311bc43e05..e77c57a0adf8 100644
> --- a/Documentation/arm64/silicon-errata.rst
> +++ b/Documentation/arm64/silicon-errata.rst
> @@ -141,6 +141,9 @@ stable kernels.
> | ARM | MMU-500 | #841119,826419 | N/A |
> +----------------+-----------------+-----------------+-----------------------------+
> +----------------+-----------------+-----------------+-----------------------------+
> +| ARM | GIC-700 | #2941627 | ARM64_ERRATUM_2941627 |
> ++----------------+-----------------+-----------------+-----------------------------+
> ++----------------+-----------------+-----------------+-----------------------------+
> | Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 |
> +----------------+-----------------+-----------------+-----------------------------+
> | Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_843419 |
> diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
> index a605aa79435a..a0a9ccf23742 100644
> --- a/drivers/irqchip/irq-gic-v3.c
> +++ b/drivers/irqchip/irq-gic-v3.c
> @@ -68,6 +68,8 @@ struct gic_chip_data {
> static void __iomem *t241_dist_base_alias[T241_CHIPS_MAX] __read_mostly;
> static DEFINE_STATIC_KEY_FALSE(gic_nvidia_t241_erratum);
>
> +static DEFINE_STATIC_KEY_FALSE(gic_arm64_2941627_erratum);
> +
> static struct gic_chip_data gic_data __read_mostly;
> static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
>
> @@ -591,10 +593,35 @@ static void gic_irq_nmi_teardown(struct irq_data *d)
> gic_irq_set_prio(d, GICD_INT_DEF_PRI);
> }
>
> +static bool gic_arm64_erratum_2941627_needed(struct irq_data *d)
> +{
> + if (!static_branch_unlikely(&gic_arm64_2941627_erratum))
> + return false;
> +
> + /*
> + * The workaround is needed if the IRQ is an SPI and
> + * the target cpu is different from the one we are
> + * executing on.
> + */
> + return !((gic_irq_in_rdist(d)) || gic_irq(d) >= 8192 ||
> + cpumask_equal(irq_data_get_effective_affinity_mask(d),
> + cpumask_of(smp_processor_id())));
I dislike this statement for multiple reasons:
- it is written as a negation, making it harder than strictly
necessary to parse as it is the opposite of the comment above
- gic_irq_in_rdist() and gic_irq(d) >= 8192 are two ways of checking
the interrupt range -- maybe we should just do that
- cpumask_equal() is *slow* if you have more that 64 CPUs, something
that is increasingly common -- a better option would be to check
whether the current CPU is in the mask or not, which would be enough
as we only have a single affinity bit set
- smp_processor_id() can check for preemption, which is pointless
here, as we're doing things under the irq_desc raw spinlock.
I would expect something like:
enum gic_intid_range range = get_intid_range(d);
return (range == SGI_RANGE || range == ESPI_RANGE) &&
!cpumask_test_cpu(raw_smp_processor_id(),
irq_data_get_effective_affinity_mask(d));
> +}
> +
> static void gic_eoi_irq(struct irq_data *d)
> {
> write_gicreg(gic_irq(d), ICC_EOIR1_EL1);
> isb();
> +
> + if (gic_arm64_erratum_2941627_needed(d)) {
> + /*
> + * Make sure the GIC stream deactivate packet
> + * issued by ICC_EOIR1_EL1 has completed before
> + * deactivating through GICD_IACTIVER.
> + */
> + dsb(sy);
> + gic_poke_irq(d, GICD_ICACTIVER);
> + }
> }
>
> static void gic_eoimode1_eoi_irq(struct irq_data *d)
> @@ -605,7 +632,11 @@ static void gic_eoimode1_eoi_irq(struct irq_data *d)
> */
> if (gic_irq(d) >= 8192 || irqd_is_forwarded_to_vcpu(d))
> return;
> - gic_write_dir(gic_irq(d));
> +
> + if (!gic_arm64_erratum_2941627_needed(d))
> + gic_write_dir(gic_irq(d));
> + else
> + gic_poke_irq(d, GICD_ICACTIVER);
> }
>
> static int gic_set_type(struct irq_data *d, unsigned int type)
> @@ -1796,6 +1827,25 @@ static bool gic_enable_quirk_nvidia_t241(void *data)
> return true;
> }
>
> +static bool gic_enable_quirk_arm64_2941627(void *data)
> +{
> + /*
> + * If CPUidle is not enabled the erratum runtime
> + * conditions can't be hit, since that requires:
> + *
> + * - A core entering a deep power state with
> + * the associated GIC redistributor asleep
> + * and an IRQ active and pending targeted at it
> + * - A different core handling the IRQ and
> + * related GIC operations at the same time
> + */
> + if (!IS_ENABLED(CONFIG_CPU_IDLE))
> + return false;
Could this still hit on a system that traps WFI to EL3 and uses this
as a way to enter a low-power mode?
Thanks,
M.
On Tue, Jul 04, 2023 at 03:44:50PM +0100, Marc Zyngier wrote:
> Lorenzo,
>
> On Tue, 04 Jul 2023 13:34:36 +0100,
> Lorenzo Pieralisi <lpieralisi@kernel.org> wrote:
> >
> > GIC700 erratum 2941627 may cause GIC-700 missing SPIs wake
> > requests when SPIs are deactivated while targeting a
> > sleeping CPU - ie a CPU for which the redistributor:
> >
> > GICR_WAKER.ProcessorSleep == 1
> >
> > This runtime situation can happen if an SPI that has been
> > activated on a core is retargeted to a different core, it
> > becomes pending and the target core subsequently enters a
> > power state quiescing the respective redistributor.
> >
> > When this situation is hit, the de-activation carried out
> > on the core that activated the SPI (through either ICC_EOIR1_EL1
> > or ICC_DIR_EL1 register writes) does not trigger a wake
> > requests for the sleeping GIC redistributor even if the SPI
> > is pending.
> >
> > Fix the erratum by de-activating the SPI using the
>
> s/Fix/ Work around/
>
> > redistributor GICD_ICACTIVER register if the runtime
> > conditions require it (ie the IRQ was retargeted between
> > activation and de-activation).
> >
> > Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
> > Cc: Marc Zyngier <maz@kernel.org>
> > ---
> > Documentation/arm64/silicon-errata.rst | 3 ++
> > drivers/irqchip/irq-gic-v3.c | 71 +++++++++++++++++++++++++-
> > 2 files changed, 73 insertions(+), 1 deletion(-)
> >
> > diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
> > index 9e311bc43e05..e77c57a0adf8 100644
> > --- a/Documentation/arm64/silicon-errata.rst
> > +++ b/Documentation/arm64/silicon-errata.rst
> > @@ -141,6 +141,9 @@ stable kernels.
> > | ARM | MMU-500 | #841119,826419 | N/A |
> > +----------------+-----------------+-----------------+-----------------------------+
> > +----------------+-----------------+-----------------+-----------------------------+
> > +| ARM | GIC-700 | #2941627 | ARM64_ERRATUM_2941627 |
> > ++----------------+-----------------+-----------------+-----------------------------+
> > ++----------------+-----------------+-----------------+-----------------------------+
> > | Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 |
> > +----------------+-----------------+-----------------+-----------------------------+
> > | Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_843419 |
> > diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
> > index a605aa79435a..a0a9ccf23742 100644
> > --- a/drivers/irqchip/irq-gic-v3.c
> > +++ b/drivers/irqchip/irq-gic-v3.c
> > @@ -68,6 +68,8 @@ struct gic_chip_data {
> > static void __iomem *t241_dist_base_alias[T241_CHIPS_MAX] __read_mostly;
> > static DEFINE_STATIC_KEY_FALSE(gic_nvidia_t241_erratum);
> >
> > +static DEFINE_STATIC_KEY_FALSE(gic_arm64_2941627_erratum);
> > +
> > static struct gic_chip_data gic_data __read_mostly;
> > static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
> >
> > @@ -591,10 +593,35 @@ static void gic_irq_nmi_teardown(struct irq_data *d)
> > gic_irq_set_prio(d, GICD_INT_DEF_PRI);
> > }
> >
> > +static bool gic_arm64_erratum_2941627_needed(struct irq_data *d)
> > +{
> > + if (!static_branch_unlikely(&gic_arm64_2941627_erratum))
> > + return false;
> > +
> > + /*
> > + * The workaround is needed if the IRQ is an SPI and
> > + * the target cpu is different from the one we are
> > + * executing on.
> > + */
> > + return !((gic_irq_in_rdist(d)) || gic_irq(d) >= 8192 ||
> > + cpumask_equal(irq_data_get_effective_affinity_mask(d),
> > + cpumask_of(smp_processor_id())));
>
> I dislike this statement for multiple reasons:
>
> - it is written as a negation, making it harder than strictly
> necessary to parse as it is the opposite of the comment above
Yes, I agree.
> - gic_irq_in_rdist() and gic_irq(d) >= 8192 are two ways of checking
> the interrupt range -- maybe we should just do that
>
> - cpumask_equal() is *slow* if you have more that 64 CPUs, something
> that is increasingly common -- a better option would be to check
> whether the current CPU is in the mask or not, which would be enough
> as we only have a single affinity bit set
>
> - smp_processor_id() can check for preemption, which is pointless
> here, as we're doing things under the irq_desc raw spinlock.
These are valid points and there is no reason why this should not be
rewritten as you suggest below.
> I would expect something like:
>
> enum gic_intid_range range = get_intid_range(d);
>
> return (range == SGI_RANGE || range == ESPI_RANGE) &&
> !cpumask_test_cpu(raw_smp_processor_id(),
> irq_data_get_effective_affinity_mask(d));
It should work (and it is easier to read in the process), thanks.
> > +}
> > +
> > static void gic_eoi_irq(struct irq_data *d)
> > {
> > write_gicreg(gic_irq(d), ICC_EOIR1_EL1);
> > isb();
> > +
> > + if (gic_arm64_erratum_2941627_needed(d)) {
> > + /*
> > + * Make sure the GIC stream deactivate packet
> > + * issued by ICC_EOIR1_EL1 has completed before
> > + * deactivating through GICD_IACTIVER.
> > + */
> > + dsb(sy);
> > + gic_poke_irq(d, GICD_ICACTIVER);
> > + }
> > }
> >
> > static void gic_eoimode1_eoi_irq(struct irq_data *d)
> > @@ -605,7 +632,11 @@ static void gic_eoimode1_eoi_irq(struct irq_data *d)
> > */
> > if (gic_irq(d) >= 8192 || irqd_is_forwarded_to_vcpu(d))
> > return;
> > - gic_write_dir(gic_irq(d));
> > +
> > + if (!gic_arm64_erratum_2941627_needed(d))
> > + gic_write_dir(gic_irq(d));
> > + else
> > + gic_poke_irq(d, GICD_ICACTIVER);
> > }
> >
> > static int gic_set_type(struct irq_data *d, unsigned int type)
> > @@ -1796,6 +1827,25 @@ static bool gic_enable_quirk_nvidia_t241(void *data)
> > return true;
> > }
> >
> > +static bool gic_enable_quirk_arm64_2941627(void *data)
> > +{
> > + /*
> > + * If CPUidle is not enabled the erratum runtime
> > + * conditions can't be hit, since that requires:
> > + *
> > + * - A core entering a deep power state with
> > + * the associated GIC redistributor asleep
> > + * and an IRQ active and pending targeted at it
> > + * - A different core handling the IRQ and
> > + * related GIC operations at the same time
> > + */
> > + if (!IS_ENABLED(CONFIG_CPU_IDLE))
> > + return false;
>
> Could this still hit on a system that traps WFI to EL3 and uses this
> as a way to enter a low-power mode?
That's a valid point, I have not thought about that. If there are set-ups
where this is allowed (and I think it *is* architecturally allowed with
EL3 saving/restoring context and entering a deep power state - if you
asked I suspect you have something concrete in mind :)) this "optimization"
must be removed since we can still hit the bug; that's what I shall do
for v2.
Thanks,
Lorenzo
On Tue, 04 Jul 2023 16:14:03 +0100,
Lorenzo Pieralisi <lpieralisi@kernel.org> wrote:
>
> On Tue, Jul 04, 2023 at 03:44:50PM +0100, Marc Zyngier wrote:
> > Lorenzo,
> >
> > On Tue, 04 Jul 2023 13:34:36 +0100,
> > Lorenzo Pieralisi <lpieralisi@kernel.org> wrote:
> > >
> > > +static bool gic_enable_quirk_arm64_2941627(void *data)
> > > +{
> > > + /*
> > > + * If CPUidle is not enabled the erratum runtime
> > > + * conditions can't be hit, since that requires:
> > > + *
> > > + * - A core entering a deep power state with
> > > + * the associated GIC redistributor asleep
> > > + * and an IRQ active and pending targeted at it
> > > + * - A different core handling the IRQ and
> > > + * related GIC operations at the same time
> > > + */
> > > + if (!IS_ENABLED(CONFIG_CPU_IDLE))
> > > + return false;
> >
> > Could this still hit on a system that traps WFI to EL3 and uses this
> > as a way to enter a low-power mode?
>
> That's a valid point, I have not thought about that. If there are set-ups
> where this is allowed (and I think it *is* architecturally allowed with
> EL3 saving/restoring context and entering a deep power state - if you
> asked I suspect you have something concrete in mind :)) this "optimization"
> must be removed since we can still hit the bug; that's what I shall do
> for v2.
I do not have a concrete example of anyone doing that, but the fact
that it is possible by the letter of the architecture makes me think
that *someone* out there must be inventive enough to do it.
So I'd rather take the safe option and *always* enable the workaround.
And then someone else can rock up and explain why they don't need it.
Thanks,
M.
On Tue, Jul 04, 2023 at 03:44:50PM +0100, Marc Zyngier wrote:
[...]
> > + return !((gic_irq_in_rdist(d)) || gic_irq(d) >= 8192 ||
> > + cpumask_equal(irq_data_get_effective_affinity_mask(d),
> > + cpumask_of(smp_processor_id())));
>
> I dislike this statement for multiple reasons:
>
> - it is written as a negation, making it harder than strictly
> necessary to parse as it is the opposite of the comment above
>
> - gic_irq_in_rdist() and gic_irq(d) >= 8192 are two ways of checking
> the interrupt range -- maybe we should just do that
>
> - cpumask_equal() is *slow* if you have more that 64 CPUs, something
> that is increasingly common -- a better option would be to check
> whether the current CPU is in the mask or not, which would be enough
> as we only have a single affinity bit set
>
> - smp_processor_id() can check for preemption, which is pointless
> here, as we're doing things under the irq_desc raw spinlock.
>
> I would expect something like:
>
> enum gic_intid_range range = get_intid_range(d);
>
> return (range == SGI_RANGE || range == ESPI_RANGE) &&
> !cpumask_test_cpu(raw_smp_processor_id(),
> irq_data_get_effective_affinity_mask(d));
>
s/SGI/SPI - just noticed, for the records.
Thanks,
Lorenzo
On Tue, 04 Jul 2023 16:27:45 +0100,
Lorenzo Pieralisi <lpieralisi@kernel.org> wrote:
>
> On Tue, Jul 04, 2023 at 03:44:50PM +0100, Marc Zyngier wrote:
>
> [...]
>
> > > + return !((gic_irq_in_rdist(d)) || gic_irq(d) >= 8192 ||
> > > + cpumask_equal(irq_data_get_effective_affinity_mask(d),
> > > + cpumask_of(smp_processor_id())));
> >
> > I dislike this statement for multiple reasons:
> >
> > - it is written as a negation, making it harder than strictly
> > necessary to parse as it is the opposite of the comment above
> >
> > - gic_irq_in_rdist() and gic_irq(d) >= 8192 are two ways of checking
> > the interrupt range -- maybe we should just do that
> >
> > - cpumask_equal() is *slow* if you have more that 64 CPUs, something
> > that is increasingly common -- a better option would be to check
> > whether the current CPU is in the mask or not, which would be enough
> > as we only have a single affinity bit set
> >
> > - smp_processor_id() can check for preemption, which is pointless
> > here, as we're doing things under the irq_desc raw spinlock.
> >
> > I would expect something like:
> >
> > enum gic_intid_range range = get_intid_range(d);
> >
> > return (range == SGI_RANGE || range == ESPI_RANGE) &&
> > !cpumask_test_cpu(raw_smp_processor_id(),
> > irq_data_get_effective_affinity_mask(d));
> >
>
> s/SGI/SPI - just noticed, for the records.
Indeed. As you can tell, I didn't really test the damn thing...
Thanks,
M.
@@ -141,6 +141,9 @@ stable kernels.
| ARM | MMU-500 | #841119,826419 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | GIC-700 | #2941627 | ARM64_ERRATUM_2941627 |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
| Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 |
+----------------+-----------------+-----------------+-----------------------------+
| Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_843419 |
@@ -68,6 +68,8 @@ struct gic_chip_data {
static void __iomem *t241_dist_base_alias[T241_CHIPS_MAX] __read_mostly;
static DEFINE_STATIC_KEY_FALSE(gic_nvidia_t241_erratum);
+static DEFINE_STATIC_KEY_FALSE(gic_arm64_2941627_erratum);
+
static struct gic_chip_data gic_data __read_mostly;
static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
@@ -591,10 +593,35 @@ static void gic_irq_nmi_teardown(struct irq_data *d)
gic_irq_set_prio(d, GICD_INT_DEF_PRI);
}
+static bool gic_arm64_erratum_2941627_needed(struct irq_data *d)
+{
+ if (!static_branch_unlikely(&gic_arm64_2941627_erratum))
+ return false;
+
+ /*
+ * The workaround is needed if the IRQ is an SPI and
+ * the target cpu is different from the one we are
+ * executing on.
+ */
+ return !((gic_irq_in_rdist(d)) || gic_irq(d) >= 8192 ||
+ cpumask_equal(irq_data_get_effective_affinity_mask(d),
+ cpumask_of(smp_processor_id())));
+}
+
static void gic_eoi_irq(struct irq_data *d)
{
write_gicreg(gic_irq(d), ICC_EOIR1_EL1);
isb();
+
+ if (gic_arm64_erratum_2941627_needed(d)) {
+ /*
+ * Make sure the GIC stream deactivate packet
+ * issued by ICC_EOIR1_EL1 has completed before
+ * deactivating through GICD_IACTIVER.
+ */
+ dsb(sy);
+ gic_poke_irq(d, GICD_ICACTIVER);
+ }
}
static void gic_eoimode1_eoi_irq(struct irq_data *d)
@@ -605,7 +632,11 @@ static void gic_eoimode1_eoi_irq(struct irq_data *d)
*/
if (gic_irq(d) >= 8192 || irqd_is_forwarded_to_vcpu(d))
return;
- gic_write_dir(gic_irq(d));
+
+ if (!gic_arm64_erratum_2941627_needed(d))
+ gic_write_dir(gic_irq(d));
+ else
+ gic_poke_irq(d, GICD_ICACTIVER);
}
static int gic_set_type(struct irq_data *d, unsigned int type)
@@ -1796,6 +1827,25 @@ static bool gic_enable_quirk_nvidia_t241(void *data)
return true;
}
+static bool gic_enable_quirk_arm64_2941627(void *data)
+{
+ /*
+ * If CPUidle is not enabled the erratum runtime
+ * conditions can't be hit, since that requires:
+ *
+ * - A core entering a deep power state with
+ * the associated GIC redistributor asleep
+ * and an IRQ active and pending targeted at it
+ * - A different core handling the IRQ and
+ * related GIC operations at the same time
+ */
+ if (!IS_ENABLED(CONFIG_CPU_IDLE))
+ return false;
+
+ static_branch_enable(&gic_arm64_2941627_erratum);
+ return true;
+}
+
static const struct gic_quirk gic_quirks[] = {
{
.desc = "GICv3: Qualcomm MSM8996 broken firmware",
@@ -1838,6 +1888,25 @@ static const struct gic_quirk gic_quirks[] = {
.mask = 0xffffffff,
.init = gic_enable_quirk_nvidia_t241,
},
+ {
+ /*
+ * GIC-700: 2941627 workaround - IP variant [0,1]
+ *
+ */
+ .desc = "GICv3: ARM64 erratum 2941627",
+ .iidr = 0x0400043b,
+ .mask = 0xff0e0fff,
+ .init = gic_enable_quirk_arm64_2941627,
+ },
+ {
+ /*
+ * GIC-700: 2941627 workaround - IP variant [2]
+ */
+ .desc = "GICv3: ARM64 erratum 2941627",
+ .iidr = 0x0402043b,
+ .mask = 0xff0f0fff,
+ .init = gic_enable_quirk_arm64_2941627,
+ },
{
}
};