[v2,1/3] genirq: Use hlist for managing resend handlers
Commit Message
The current implementation utilizes a bitmap for managing IRQ resend
handlers, which is allocated based on the SPARSE_IRQ/NR_IRQS macros.
However, this method may not efficiently utilize memory during runtime,
particularly when IRQ_BITMAP_BITS is large.
This proposed patch aims to address this issue by using hlist to manage
IRQ resend handlers instead of relying on static memory allocation.
Additionally, a new function, clear_irq_resend(), is introduced and
called from irq_shutdown to ensure a graceful teardown of IRQD.
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
---
include/linux/irqdesc.h | 3 +++
kernel/irq/chip.c | 1 +
kernel/irq/internals.h | 1 +
kernel/irq/irqdesc.c | 6 ++++++
kernel/irq/resend.c | 41 ++++++++++++++++++++++++-----------------
5 files changed, 35 insertions(+), 17 deletions(-)
Comments
Hi Marc,
On 4/9/23 04:10, Marc Zyngier wrote:
> External email: Use caution opening links or attachments
>
>
> On Sat, 08 Apr 2023 18:15:24 +0100,
> Shanker Donthineni <sdonthineni@nvidia.com> wrote:
>>
>> The current implementation utilizes a bitmap for managing IRQ resend
>> handlers, which is allocated based on the SPARSE_IRQ/NR_IRQS macros.
>> However, this method may not efficiently utilize memory during runtime,
>> particularly when IRQ_BITMAP_BITS is large.
>>
>> This proposed patch aims to address this issue by using hlist to manage
>
> s/This proposed patch aims to//
>
>> IRQ resend handlers instead of relying on static memory allocation.
>> Additionally, a new function, clear_irq_resend(), is introduced and
>> called from irq_shutdown to ensure a graceful teardown of IRQD.
>
> s/IRQD/the interrupt/
>
Ack
>>
>> Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
>> ---
>> include/linux/irqdesc.h | 3 +++
>> kernel/irq/chip.c | 1 +
>> kernel/irq/internals.h | 1 +
>> kernel/irq/irqdesc.c | 6 ++++++
>> kernel/irq/resend.c | 41 ++++++++++++++++++++++++-----------------
>> 5 files changed, 35 insertions(+), 17 deletions(-)
>>
>> diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
>> index 844a8e30e6de..d9451d456a73 100644
>> --- a/include/linux/irqdesc.h
>> +++ b/include/linux/irqdesc.h
>> @@ -102,6 +102,9 @@ struct irq_desc {
>> int parent_irq;
>> struct module *owner;
>> const char *name;
>> +#ifdef CONFIG_HARDIRQS_SW_RESEND
>> + struct hlist_node resend_node;
>> +#endif
>> } ____cacheline_internodealigned_in_smp;
>>
>> #ifdef CONFIG_SPARSE_IRQ
>> diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
>> index 49e7bc871fec..2eac5532c3c8 100644
>> --- a/kernel/irq/chip.c
>> +++ b/kernel/irq/chip.c
>> @@ -306,6 +306,7 @@ static void __irq_disable(struct irq_desc *desc, bool mask);
>> void irq_shutdown(struct irq_desc *desc)
>> {
>> if (irqd_is_started(&desc->irq_data)) {
>> + clear_irq_resend(desc);
>> desc->depth = 1;
>> if (desc->irq_data.chip->irq_shutdown) {
>> desc->irq_data.chip->irq_shutdown(&desc->irq_data);
>> diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
>> index 5fdc0b557579..2fd17057ed4b 100644
>> --- a/kernel/irq/internals.h
>> +++ b/kernel/irq/internals.h
>> @@ -113,6 +113,7 @@ irqreturn_t handle_irq_event(struct irq_desc *desc);
>>
>> /* Resending of interrupts :*/
>> int check_irq_resend(struct irq_desc *desc, bool inject);
>> +void clear_irq_resend(struct irq_desc *desc);
>> bool irq_wait_for_poll(struct irq_desc *desc);
>> void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action);
>>
>> diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
>> index 240e145e969f..47543b5a0edb 100644
>> --- a/kernel/irq/irqdesc.c
>> +++ b/kernel/irq/irqdesc.c
>> @@ -415,6 +415,9 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
>> desc_set_defaults(irq, desc, node, affinity, owner);
>> irqd_set(&desc->irq_data, flags);
>> kobject_init(&desc->kobj, &irq_kobj_type);
>> +#ifdef CONFIG_HARDIRQS_SW_RESEND
>> + INIT_HLIST_NODE(&desc->resend_node);
>> +#endif
>
> Please add a helper that hides the #ifdefery from the core code, and
> use it in both spots.
> Sure, I will add a helper function irq_resend_init()
>>
>> return desc;
>>
>> @@ -581,6 +584,9 @@ int __init early_irq_init(void)
>> mutex_init(&desc[i].request_mutex);
>> init_waitqueue_head(&desc[i].wait_for_threads);
>> desc_set_defaults(i, &desc[i], node, NULL, NULL);
>> +#ifdef CONFIG_HARDIRQS_SW_RESEND
>> + INIT_HLIST_NODE(&desc->resend_node);
>> +#endif
>> }
>> return arch_early_irq_init();
>> }
>> diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
>> index 0c46e9fe3a89..d3db2628a720 100644
>> --- a/kernel/irq/resend.c
>> +++ b/kernel/irq/resend.c
>> @@ -21,8 +21,9 @@
>>
>> #ifdef CONFIG_HARDIRQS_SW_RESEND
>>
>> -/* Bitmap to handle software resend of interrupts: */
>> -static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
>> +/* hlist_head to handle software resend of interrupts: */
>> +static HLIST_HEAD(irq_resend_list);
>> +static DEFINE_RAW_SPINLOCK(irq_resend_lock);
>>
>> /*
>> * Run software resends of IRQ's
>> @@ -30,18 +31,17 @@ static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
>> static void resend_irqs(struct tasklet_struct *unused)
>> {
>> struct irq_desc *desc;
>> - int irq;
>> -
>> - while (!bitmap_empty(irqs_resend, nr_irqs)) {
>> - irq = find_first_bit(irqs_resend, nr_irqs);
>> - clear_bit(irq, irqs_resend);
>> - desc = irq_to_desc(irq);
>> - if (!desc)
>> - continue;
>> - local_irq_disable();
>> +
>> + raw_spin_lock_irq(&irq_resend_lock);
>> + while (!hlist_empty(&irq_resend_list)) {
>> + desc = hlist_entry(irq_resend_list.first, struct irq_desc,
>> + resend_node);
>> + hlist_del_init(&desc->resend_node);
>> + raw_spin_unlock(&irq_resend_lock);
>> desc->handle_irq(desc);
>> - local_irq_enable();
>> + raw_spin_lock(&irq_resend_lock);
>
> What makes it safe to drop the local_irq_*able()?
>
> tasklet_action_common() explicitly enables interrupts when calling the
> callback, so unless there is some other interrupt disabling that I
> can't immediately spot, the handler may run in the wrong context.
>
Unless I am overlooking something, interrupts are disabled within the while
loop unless desc->handle_irq() is enabling it. The existing code disables
and enables interrupts for each handler invocation, whereas the modified
code does it only once for all outstanding handlers.
Please let me know if the below code changes are acceptable. Similar to the
current interrupt disable/enable logic except the protection lock resend.
static void resend_irqs(struct tasklet_struct *unused)
{
struct irq_desc *desc;
- int irq;
-
- while (!bitmap_empty(irqs_resend, nr_irqs)) {
- irq = find_first_bit(irqs_resend, nr_irqs);
- clear_bit(irq, irqs_resend);
- desc = irq_to_desc(irq);
- if (!desc)
- continue;
+
+ raw_spin_lock(&irq_resend_lock);
+ while (!hlist_empty(&irq_resend_list)) {
+ desc = hlist_entry(irq_resend_list.first, struct irq_desc,
+ resend_node);
+ hlist_del_init(&desc->resend_node);
+ raw_spin_unlock(&irq_resend_lock);
+
local_irq_disable();
desc->handle_irq(desc);
local_irq_enable();
+
+ raw_spin_lock(&irq_resend_lock);
}
+ raw_spin_unlock(&irq_resend_lock);
}
> If there is such interrupt disabling, then please point it out in the
> commit message so that idiots like me can refer to it.
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
On Sun, 09 Apr 2023 13:00:27 +0100,
Shanker Donthineni <sdonthineni@nvidia.com> wrote:
>
> >> @@ -30,18 +31,17 @@ static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
> >> static void resend_irqs(struct tasklet_struct *unused)
> >> {
> >> struct irq_desc *desc;
> >> - int irq;
> >> -
> >> - while (!bitmap_empty(irqs_resend, nr_irqs)) {
> >> - irq = find_first_bit(irqs_resend, nr_irqs);
> >> - clear_bit(irq, irqs_resend);
> >> - desc = irq_to_desc(irq);
> >> - if (!desc)
> >> - continue;
> >> - local_irq_disable();
> >> +
> >> + raw_spin_lock_irq(&irq_resend_lock);
> >> + while (!hlist_empty(&irq_resend_list)) {
> >> + desc = hlist_entry(irq_resend_list.first, struct irq_desc,
> >> + resend_node);
> >> + hlist_del_init(&desc->resend_node);
> >> + raw_spin_unlock(&irq_resend_lock);
> >> desc->handle_irq(desc);
> >> - local_irq_enable();
> >> + raw_spin_lock(&irq_resend_lock);
> >
> > What makes it safe to drop the local_irq_*able()?
> >
> > tasklet_action_common() explicitly enables interrupts when calling the
> > callback, so unless there is some other interrupt disabling that I
> > can't immediately spot, the handler may run in the wrong context.
> >
>
> Unless I am overlooking something, interrupts are disabled within the while
> loop unless desc->handle_irq() is enabling it. The existing code disables
> and enables interrupts for each handler invocation, whereas the modified
> code does it only once for all outstanding handlers.
Ah, you use raw_spinlock_irq() outside of the loop. I somehow glanced
over that, apologies for the noise. Unless we expect a really long
list of interrupts to be resent, your current code should be OK.
Thanks,
M.
On 4/10/23 05:05, Marc Zyngier wrote:
> External email: Use caution opening links or attachments
>
>
> On Sun, 09 Apr 2023 13:00:27 +0100,
> Shanker Donthineni <sdonthineni@nvidia.com> wrote:
>>
>>>> @@ -30,18 +31,17 @@ static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
>>>> static void resend_irqs(struct tasklet_struct *unused)
>>>> {
>>>> struct irq_desc *desc;
>>>> - int irq;
>>>> -
>>>> - while (!bitmap_empty(irqs_resend, nr_irqs)) {
>>>> - irq = find_first_bit(irqs_resend, nr_irqs);
>>>> - clear_bit(irq, irqs_resend);
>>>> - desc = irq_to_desc(irq);
>>>> - if (!desc)
>>>> - continue;
>>>> - local_irq_disable();
>>>> +
>>>> + raw_spin_lock_irq(&irq_resend_lock);
>>>> + while (!hlist_empty(&irq_resend_list)) {
>>>> + desc = hlist_entry(irq_resend_list.first, struct irq_desc,
>>>> + resend_node);
>>>> + hlist_del_init(&desc->resend_node);
>>>> + raw_spin_unlock(&irq_resend_lock);
>>>> desc->handle_irq(desc);
>>>> - local_irq_enable();
>>>> + raw_spin_lock(&irq_resend_lock);
>>>
>>> What makes it safe to drop the local_irq_*able()?
>>>
>>> tasklet_action_common() explicitly enables interrupts when calling the
>>> callback, so unless there is some other interrupt disabling that I
>>> can't immediately spot, the handler may run in the wrong context.
>>>
>>
>> Unless I am overlooking something, interrupts are disabled within the while
>> loop unless desc->handle_irq() is enabling it. The existing code disables
>> and enables interrupts for each handler invocation, whereas the modified
>> code does it only once for all outstanding handlers.
>
> Ah, you use raw_spinlock_irq() outside of the loop. I somehow glanced
> over that, apologies for the noise. Unless we expect a really long
> list of interrupts to be resent, your current code should be OK.
>
Thanks, I'll post v3 patches to address the other review comments.
@@ -102,6 +102,9 @@ struct irq_desc {
int parent_irq;
struct module *owner;
const char *name;
+#ifdef CONFIG_HARDIRQS_SW_RESEND
+ struct hlist_node resend_node;
+#endif
} ____cacheline_internodealigned_in_smp;
#ifdef CONFIG_SPARSE_IRQ
@@ -306,6 +306,7 @@ static void __irq_disable(struct irq_desc *desc, bool mask);
void irq_shutdown(struct irq_desc *desc)
{
if (irqd_is_started(&desc->irq_data)) {
+ clear_irq_resend(desc);
desc->depth = 1;
if (desc->irq_data.chip->irq_shutdown) {
desc->irq_data.chip->irq_shutdown(&desc->irq_data);
@@ -113,6 +113,7 @@ irqreturn_t handle_irq_event(struct irq_desc *desc);
/* Resending of interrupts :*/
int check_irq_resend(struct irq_desc *desc, bool inject);
+void clear_irq_resend(struct irq_desc *desc);
bool irq_wait_for_poll(struct irq_desc *desc);
void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action);
@@ -415,6 +415,9 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
desc_set_defaults(irq, desc, node, affinity, owner);
irqd_set(&desc->irq_data, flags);
kobject_init(&desc->kobj, &irq_kobj_type);
+#ifdef CONFIG_HARDIRQS_SW_RESEND
+ INIT_HLIST_NODE(&desc->resend_node);
+#endif
return desc;
@@ -581,6 +584,9 @@ int __init early_irq_init(void)
mutex_init(&desc[i].request_mutex);
init_waitqueue_head(&desc[i].wait_for_threads);
desc_set_defaults(i, &desc[i], node, NULL, NULL);
+#ifdef CONFIG_HARDIRQS_SW_RESEND
+ INIT_HLIST_NODE(&desc->resend_node);
+#endif
}
return arch_early_irq_init();
}
@@ -21,8 +21,9 @@
#ifdef CONFIG_HARDIRQS_SW_RESEND
-/* Bitmap to handle software resend of interrupts: */
-static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
+/* hlist_head to handle software resend of interrupts: */
+static HLIST_HEAD(irq_resend_list);
+static DEFINE_RAW_SPINLOCK(irq_resend_lock);
/*
* Run software resends of IRQ's
@@ -30,18 +31,17 @@ static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
static void resend_irqs(struct tasklet_struct *unused)
{
struct irq_desc *desc;
- int irq;
-
- while (!bitmap_empty(irqs_resend, nr_irqs)) {
- irq = find_first_bit(irqs_resend, nr_irqs);
- clear_bit(irq, irqs_resend);
- desc = irq_to_desc(irq);
- if (!desc)
- continue;
- local_irq_disable();
+
+ raw_spin_lock_irq(&irq_resend_lock);
+ while (!hlist_empty(&irq_resend_list)) {
+ desc = hlist_entry(irq_resend_list.first, struct irq_desc,
+ resend_node);
+ hlist_del_init(&desc->resend_node);
+ raw_spin_unlock(&irq_resend_lock);
desc->handle_irq(desc);
- local_irq_enable();
+ raw_spin_lock(&irq_resend_lock);
}
+ raw_spin_unlock_irq(&irq_resend_lock);
}
/* Tasklet to handle resend: */
@@ -49,8 +49,6 @@ static DECLARE_TASKLET(resend_tasklet, resend_irqs);
static int irq_sw_resend(struct irq_desc *desc)
{
- unsigned int irq = irq_desc_get_irq(desc);
-
/*
* Validate whether this interrupt can be safely injected from
* non interrupt context
@@ -70,16 +68,25 @@ static int irq_sw_resend(struct irq_desc *desc)
*/
if (!desc->parent_irq)
return -EINVAL;
- irq = desc->parent_irq;
}
- /* Set it pending and activate the softirq: */
- set_bit(irq, irqs_resend);
+ /* Add to resend_list and activate the softirq: */
+ raw_spin_lock(&irq_resend_lock);
+ hlist_add_head(&desc->resend_node, &irq_resend_list);
+ raw_spin_unlock(&irq_resend_lock);
tasklet_schedule(&resend_tasklet);
return 0;
}
+void clear_irq_resend(struct irq_desc *desc)
+{
+ raw_spin_lock(&irq_resend_lock);
+ hlist_del_init(&desc->resend_node);
+ raw_spin_unlock(&irq_resend_lock);
+}
#else
+void clear_irq_resend(struct irq_desc *desc) {}
+
static int irq_sw_resend(struct irq_desc *desc)
{
return -EINVAL;