[v5,1/3] genirq: Use hlist for managing resend handlers

Message ID 20230519134902.1495562-2-sdonthineni@nvidia.com
State New
Headers
Series Increase the number of IRQ descriptors for SPARSEIRQ |

Commit Message

Shanker Donthineni May 19, 2023, 1:49 p.m. UTC
  The current implementation utilizes a bitmap for managing IRQ resend
handlers, which is allocated based on the SPARSE_IRQ/NR_IRQS macros.
However, this method may not efficiently utilize memory during runtime,
particularly when IRQ_BITMAP_BITS is large.

Address this issue by using the hlist to manage IRQ resend handlers
instead of relying on a static bitmap memory allocation. Additionally,
a new function, clear_irq_resend(), is introduced and called from
irq_shutdown to ensure a graceful teardown of the interrupt.

Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
---
 include/linux/irqdesc.h |  3 +++
 kernel/irq/chip.c       |  1 +
 kernel/irq/internals.h  |  2 ++
 kernel/irq/irqdesc.c    |  2 ++
 kernel/irq/resend.c     | 47 ++++++++++++++++++++++++++---------------
 5 files changed, 38 insertions(+), 17 deletions(-)
  

Comments

Liao Chang May 29, 2023, 7:57 a.m. UTC | #1
Hi, Shanker

在 2023/5/19 21:49, Shanker Donthineni 写道:
> The current implementation utilizes a bitmap for managing IRQ resend
> handlers, which is allocated based on the SPARSE_IRQ/NR_IRQS macros.
> However, this method may not efficiently utilize memory during runtime,
> particularly when IRQ_BITMAP_BITS is large.
> 
> Address this issue by using the hlist to manage IRQ resend handlers
> instead of relying on a static bitmap memory allocation. Additionally,
> a new function, clear_irq_resend(), is introduced and called from
> irq_shutdown to ensure a graceful teardown of the interrupt.
> 
> Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
> ---
>  include/linux/irqdesc.h |  3 +++
>  kernel/irq/chip.c       |  1 +
>  kernel/irq/internals.h  |  2 ++
>  kernel/irq/irqdesc.c    |  2 ++
>  kernel/irq/resend.c     | 47 ++++++++++++++++++++++++++---------------
>  5 files changed, 38 insertions(+), 17 deletions(-)
> 
> diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
> index 844a8e30e6de..d9451d456a73 100644
> --- a/include/linux/irqdesc.h
> +++ b/include/linux/irqdesc.h
> @@ -102,6 +102,9 @@ struct irq_desc {
>  	int			parent_irq;
>  	struct module		*owner;
>  	const char		*name;
> +#ifdef CONFIG_HARDIRQS_SW_RESEND
> +	struct hlist_node	resend_node;
> +#endif
>  } ____cacheline_internodealigned_in_smp;

Although there is no documented rule that limits the change of the KABI
struct irq_desc, it is still better to keep the irq_desc definition stable.

>  
>  #ifdef CONFIG_SPARSE_IRQ
> diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
> index 49e7bc871fec..2eac5532c3c8 100644
> --- a/kernel/irq/chip.c
> +++ b/kernel/irq/chip.c
> @@ -306,6 +306,7 @@ static void __irq_disable(struct irq_desc *desc, bool mask);
>  void irq_shutdown(struct irq_desc *desc)
>  {
>  	if (irqd_is_started(&desc->irq_data)) {
> +		clear_irq_resend(desc);
>  		desc->depth = 1;
>  		if (desc->irq_data.chip->irq_shutdown) {
>  			desc->irq_data.chip->irq_shutdown(&desc->irq_data);
> diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
> index 5fdc0b557579..51fc8c497c22 100644
> --- a/kernel/irq/internals.h
> +++ b/kernel/irq/internals.h
> @@ -113,6 +113,8 @@ irqreturn_t handle_irq_event(struct irq_desc *desc);
>  
>  /* Resending of interrupts :*/
>  int check_irq_resend(struct irq_desc *desc, bool inject);
> +void clear_irq_resend(struct irq_desc *desc);
> +void irq_resend_init(struct irq_desc *desc);
>  bool irq_wait_for_poll(struct irq_desc *desc);
>  void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action);
>  
> diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
> index 240e145e969f..b401b89b226a 100644
> --- a/kernel/irq/irqdesc.c
> +++ b/kernel/irq/irqdesc.c
> @@ -415,6 +415,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
>  	desc_set_defaults(irq, desc, node, affinity, owner);
>  	irqd_set(&desc->irq_data, flags);
>  	kobject_init(&desc->kobj, &irq_kobj_type);
> +	irq_resend_init(desc);
>  
>  	return desc;
>  
> @@ -581,6 +582,7 @@ int __init early_irq_init(void)
>  		mutex_init(&desc[i].request_mutex);
>  		init_waitqueue_head(&desc[i].wait_for_threads);
>  		desc_set_defaults(i, &desc[i], node, NULL, NULL);
> +		irq_resend_init(desc);
>  	}
>  	return arch_early_irq_init();
>  }
> diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
> index 0c46e9fe3a89..edec335c0a7a 100644
> --- a/kernel/irq/resend.c
> +++ b/kernel/irq/resend.c
> @@ -21,8 +21,9 @@
>  
>  #ifdef CONFIG_HARDIRQS_SW_RESEND
>  
> -/* Bitmap to handle software resend of interrupts: */
> -static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
> +/* hlist_head to handle software resend of interrupts: */
> +static HLIST_HEAD(irq_resend_list);
> +static DEFINE_RAW_SPINLOCK(irq_resend_lock);

What is the benefit of using hlist here? If you want to enjoy the
low latency of querying elements by key, you must define a hlist table
with a reasonable number of buckets. Otherwise, I don't think the time
complexity of hlist is better than a regular double-linked list, right?

>  
>  /*
>   * Run software resends of IRQ's
> @@ -30,18 +31,17 @@ static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
>  static void resend_irqs(struct tasklet_struct *unused)
>  {
>  	struct irq_desc *desc;
> -	int irq;
> -
> -	while (!bitmap_empty(irqs_resend, nr_irqs)) {
> -		irq = find_first_bit(irqs_resend, nr_irqs);
> -		clear_bit(irq, irqs_resend);
> -		desc = irq_to_desc(irq);
> -		if (!desc)
> -			continue;
> -		local_irq_disable();
> +
> +	raw_spin_lock_irq(&irq_resend_lock);
> +	while (!hlist_empty(&irq_resend_list)) {> +		desc = hlist_entry(irq_resend_list.first, struct irq_desc,
> +				   resend_node);
> +		hlist_del_init(&desc->resend_node);
> +		raw_spin_unlock(&irq_resend_lock);
>  		desc->handle_irq(desc);
> -		local_irq_enable();
> +		raw_spin_lock(&irq_resend_lock);
>  	}
> +	raw_spin_unlock_irq(&irq_resend_lock);
>  }
>  
>  /* Tasklet to handle resend: */
> @@ -49,8 +49,6 @@ static DECLARE_TASKLET(resend_tasklet, resend_irqs);
>  
>  static int irq_sw_resend(struct irq_desc *desc)
>  {
> -	unsigned int irq = irq_desc_get_irq(desc);
> -
>  	/*
>  	 * Validate whether this interrupt can be safely injected from
>  	 * non interrupt context
> @@ -70,16 +68,31 @@ static int irq_sw_resend(struct irq_desc *desc)
>  		 */
>  		if (!desc->parent_irq)
>  			return -EINVAL;
> -		irq = desc->parent_irq;

Why delete this code?

>  	}
>  
> -	/* Set it pending and activate the softirq: */
> -	set_bit(irq, irqs_resend);
> +	/* Add to resend_list and activate the softirq: */
> +	raw_spin_lock(&irq_resend_lock);
> +	hlist_add_head(&desc->resend_node, &irq_resend_list);
> +	raw_spin_unlock(&irq_resend_lock);

Do you conside a situation where irq_sw_resend() is running on two CPUs concurrently?
If so, the same desc could be added into irq_resend_list twice by mistake.

>  	tasklet_schedule(&resend_tasklet);
>  	return 0;
>  }
>  
> +void clear_irq_resend(struct irq_desc *desc)
> +{
> +	raw_spin_lock(&irq_resend_lock);
> +	hlist_del_init(&desc->resend_node);
> +	raw_spin_unlock(&irq_resend_lock);
> +}
> +
> +void irq_resend_init(struct irq_desc *desc)
> +{
> +	INIT_HLIST_NODE(&desc->resend_node);
> +}
>  #else
> +void clear_irq_resend(struct irq_desc *desc) {}
> +void irq_resend_init(struct irq_desc *desc) {}
> +
>  static int irq_sw_resend(struct irq_desc *desc)
>  {
>  	return -EINVAL;
  
Marc Zyngier May 29, 2023, 8:48 a.m. UTC | #2
On Mon, 29 May 2023 08:57:07 +0100,
"Liao, Chang" <liaochang1@huawei.com> wrote:
> 
> Hi, Shanker
> 
> 在 2023/5/19 21:49, Shanker Donthineni 写道:
> > The current implementation utilizes a bitmap for managing IRQ resend
> > handlers, which is allocated based on the SPARSE_IRQ/NR_IRQS macros.
> > However, this method may not efficiently utilize memory during runtime,
> > particularly when IRQ_BITMAP_BITS is large.
> > 
> > Address this issue by using the hlist to manage IRQ resend handlers
> > instead of relying on a static bitmap memory allocation. Additionally,
> > a new function, clear_irq_resend(), is introduced and called from
> > irq_shutdown to ensure a graceful teardown of the interrupt.
> > 
> > Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
> > ---
> >  include/linux/irqdesc.h |  3 +++
> >  kernel/irq/chip.c       |  1 +
> >  kernel/irq/internals.h  |  2 ++
> >  kernel/irq/irqdesc.c    |  2 ++
> >  kernel/irq/resend.c     | 47 ++++++++++++++++++++++++++---------------
> >  5 files changed, 38 insertions(+), 17 deletions(-)
> > 
> > diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
> > index 844a8e30e6de..d9451d456a73 100644
> > --- a/include/linux/irqdesc.h
> > +++ b/include/linux/irqdesc.h
> > @@ -102,6 +102,9 @@ struct irq_desc {
> >  	int			parent_irq;
> >  	struct module		*owner;
> >  	const char		*name;
> > +#ifdef CONFIG_HARDIRQS_SW_RESEND
> > +	struct hlist_node	resend_node;
> > +#endif
> >  } ____cacheline_internodealigned_in_smp;
> 
> Although there is no documented rule that limits the change of the KABI
> struct irq_desc, it is still better to keep the irq_desc definition stable.

On what grounds? There is no such thing as a stable in-kernel ABI,
specially for things as internal as irq_desc.

> >  
> >  #ifdef CONFIG_SPARSE_IRQ
> > diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
> > index 49e7bc871fec..2eac5532c3c8 100644
> > --- a/kernel/irq/chip.c
> > +++ b/kernel/irq/chip.c
> > @@ -306,6 +306,7 @@ static void __irq_disable(struct irq_desc *desc, bool mask);
> >  void irq_shutdown(struct irq_desc *desc)
> >  {
> >  	if (irqd_is_started(&desc->irq_data)) {
> > +		clear_irq_resend(desc);
> >  		desc->depth = 1;
> >  		if (desc->irq_data.chip->irq_shutdown) {
> >  			desc->irq_data.chip->irq_shutdown(&desc->irq_data);
> > diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
> > index 5fdc0b557579..51fc8c497c22 100644
> > --- a/kernel/irq/internals.h
> > +++ b/kernel/irq/internals.h
> > @@ -113,6 +113,8 @@ irqreturn_t handle_irq_event(struct irq_desc *desc);
> >  
> >  /* Resending of interrupts :*/
> >  int check_irq_resend(struct irq_desc *desc, bool inject);
> > +void clear_irq_resend(struct irq_desc *desc);
> > +void irq_resend_init(struct irq_desc *desc);
> >  bool irq_wait_for_poll(struct irq_desc *desc);
> >  void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action);
> >  
> > diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
> > index 240e145e969f..b401b89b226a 100644
> > --- a/kernel/irq/irqdesc.c
> > +++ b/kernel/irq/irqdesc.c
> > @@ -415,6 +415,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
> >  	desc_set_defaults(irq, desc, node, affinity, owner);
> >  	irqd_set(&desc->irq_data, flags);
> >  	kobject_init(&desc->kobj, &irq_kobj_type);
> > +	irq_resend_init(desc);
> >  
> >  	return desc;
> >  
> > @@ -581,6 +582,7 @@ int __init early_irq_init(void)
> >  		mutex_init(&desc[i].request_mutex);
> >  		init_waitqueue_head(&desc[i].wait_for_threads);
> >  		desc_set_defaults(i, &desc[i], node, NULL, NULL);
> > +		irq_resend_init(desc);
> >  	}
> >  	return arch_early_irq_init();
> >  }
> > diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
> > index 0c46e9fe3a89..edec335c0a7a 100644
> > --- a/kernel/irq/resend.c
> > +++ b/kernel/irq/resend.c
> > @@ -21,8 +21,9 @@
> >  
> >  #ifdef CONFIG_HARDIRQS_SW_RESEND
> >  
> > -/* Bitmap to handle software resend of interrupts: */
> > -static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
> > +/* hlist_head to handle software resend of interrupts: */
> > +static HLIST_HEAD(irq_resend_list);
> > +static DEFINE_RAW_SPINLOCK(irq_resend_lock);
> 
> What is the benefit of using hlist here? If you want to enjoy the
> low latency of querying elements by key, you must define a hlist table
> with a reasonable number of buckets. Otherwise, I don't think the time
> complexity of hlist is better than a regular double-linked list, right?

You do realise that the list is processed in order, one element after
the other, without ever querying any arbitrary element? Have you read
the code?

>
> >  
> >  /*
> >   * Run software resends of IRQ's
> > @@ -30,18 +31,17 @@ static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
> >  static void resend_irqs(struct tasklet_struct *unused)
> >  {
> >  	struct irq_desc *desc;
> > -	int irq;
> > -
> > -	while (!bitmap_empty(irqs_resend, nr_irqs)) {
> > -		irq = find_first_bit(irqs_resend, nr_irqs);
> > -		clear_bit(irq, irqs_resend);
> > -		desc = irq_to_desc(irq);
> > -		if (!desc)
> > -			continue;
> > -		local_irq_disable();
> > +
> > +	raw_spin_lock_irq(&irq_resend_lock);
> > +	while (!hlist_empty(&irq_resend_list)) {
> > +		desc = hlist_entry(irq_resend_list.first, struct irq_desc,
> > +				   resend_node);
> > +		hlist_del_init(&desc->resend_node);
> > +		raw_spin_unlock(&irq_resend_lock);
> >  		desc->handle_irq(desc);
> > -		local_irq_enable();
> > +		raw_spin_lock(&irq_resend_lock);
> >  	}
> > +	raw_spin_unlock_irq(&irq_resend_lock);
> >  }
> >  
> >  /* Tasklet to handle resend: */
> > @@ -49,8 +49,6 @@ static DECLARE_TASKLET(resend_tasklet, resend_irqs);
> >  
> >  static int irq_sw_resend(struct irq_desc *desc)
> >  {
> > -	unsigned int irq = irq_desc_get_irq(desc);
> > -
> >  	/*
> >  	 * Validate whether this interrupt can be safely injected from
> >  	 * non interrupt context
> > @@ -70,16 +68,31 @@ static int irq_sw_resend(struct irq_desc *desc)
> >  		 */
> >  		if (!desc->parent_irq)
> >  			return -EINVAL;
> > -		irq = desc->parent_irq;
> 
> Why delete this code?

OK, now I know you haven't read this code at all :-(.

> 
> >  	}
> >  
> > -	/* Set it pending and activate the softirq: */
> > -	set_bit(irq, irqs_resend);
> > +	/* Add to resend_list and activate the softirq: */
> > +	raw_spin_lock(&irq_resend_lock);
> > +	hlist_add_head(&desc->resend_node, &irq_resend_list);
> > +	raw_spin_unlock(&irq_resend_lock);
> 
> Do you conside a situation where irq_sw_resend() is running on two CPUs concurrently?
> If so, the same desc could be added into irq_resend_list twice by mistake.

Have you looked at the calling site (stress on singular), the locking
requirements, and the role IRQS_REPLAY plays when it comes to queuing
an interrupt for resend?

	M.
  
Thomas Gleixner May 29, 2023, 9:51 p.m. UTC | #3
On Mon, May 29 2023 at 15:57, Chang Liao wrote:
> 在 2023/5/19 21:49, Shanker Donthineni 写道:
>> diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
>> index 844a8e30e6de..d9451d456a73 100644
>> --- a/include/linux/irqdesc.h
>> +++ b/include/linux/irqdesc.h
>> @@ -102,6 +102,9 @@ struct irq_desc {
>>  	int			parent_irq;
>>  	struct module		*owner;
>>  	const char		*name;
>> +#ifdef CONFIG_HARDIRQS_SW_RESEND
>> +	struct hlist_node	resend_node;
>> +#endif
>>  } ____cacheline_internodealigned_in_smp;
>
> Although there is no documented rule that limits the change of the KABI
> struct irq_desc, it is still better to keep the irq_desc definition
> stable.

Please read and understand:

       Documentation/process/stable-api-nonsense.rst

If you want KABI, then that's  _YOUR_ problem, period.

>> -/* Bitmap to handle software resend of interrupts: */
>> -static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
>> +/* hlist_head to handle software resend of interrupts: */
>> +static HLIST_HEAD(irq_resend_list);
>> +static DEFINE_RAW_SPINLOCK(irq_resend_lock);
>
> What is the benefit of using hlist here? If you want to enjoy the
> low latency of querying elements by key, you must define a hlist table
> with a reasonable number of buckets. Otherwise, I don't think the time
> complexity of hlist is better than a regular double-linked list,
> right?

What's complex about hlist in this case? Please explain.

Thanks,

        tglx
  
Liao Chang May 30, 2023, 1:44 a.m. UTC | #4
Hi, Marc

在 2023/5/29 16:48, Marc Zyngier 写道:
> On Mon, 29 May 2023 08:57:07 +0100,
> "Liao, Chang" <liaochang1@huawei.com> wrote:
>>
>> Hi, Shanker
>>
>> 在 2023/5/19 21:49, Shanker Donthineni 写道:
>>> The current implementation utilizes a bitmap for managing IRQ resend
>>> handlers, which is allocated based on the SPARSE_IRQ/NR_IRQS macros.
>>> However, this method may not efficiently utilize memory during runtime,
>>> particularly when IRQ_BITMAP_BITS is large.
>>>
>>> Address this issue by using the hlist to manage IRQ resend handlers
>>> instead of relying on a static bitmap memory allocation. Additionally,
>>> a new function, clear_irq_resend(), is introduced and called from
>>> irq_shutdown to ensure a graceful teardown of the interrupt.
>>>
>>> Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
>>> ---
>>>  include/linux/irqdesc.h |  3 +++
>>>  kernel/irq/chip.c       |  1 +
>>>  kernel/irq/internals.h  |  2 ++
>>>  kernel/irq/irqdesc.c    |  2 ++
>>>  kernel/irq/resend.c     | 47 ++++++++++++++++++++++++++---------------
>>>  5 files changed, 38 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
>>> index 844a8e30e6de..d9451d456a73 100644
>>> --- a/include/linux/irqdesc.h
>>> +++ b/include/linux/irqdesc.h
>>> @@ -102,6 +102,9 @@ struct irq_desc {
>>>  	int			parent_irq;
>>>  	struct module		*owner;
>>>  	const char		*name;
>>> +#ifdef CONFIG_HARDIRQS_SW_RESEND
>>> +	struct hlist_node	resend_node;
>>> +#endif
>>>  } ____cacheline_internodealigned_in_smp;
>>
>> Although there is no documented rule that limits the change of the KABI
>> struct irq_desc, it is still better to keep the irq_desc definition stable.
> 
> On what grounds? There is no such thing as a stable in-kernel ABI,
> specially for things as internal as irq_desc.

Okay, helpful.

> 
>>>  
>>>  #ifdef CONFIG_SPARSE_IRQ
>>> diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
>>> index 49e7bc871fec..2eac5532c3c8 100644
>>> --- a/kernel/irq/chip.c
>>> +++ b/kernel/irq/chip.c
>>> @@ -306,6 +306,7 @@ static void __irq_disable(struct irq_desc *desc, bool mask);
>>>  void irq_shutdown(struct irq_desc *desc)
>>>  {
>>>  	if (irqd_is_started(&desc->irq_data)) {
>>> +		clear_irq_resend(desc);
>>>  		desc->depth = 1;
>>>  		if (desc->irq_data.chip->irq_shutdown) {
>>>  			desc->irq_data.chip->irq_shutdown(&desc->irq_data);
>>> diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
>>> index 5fdc0b557579..51fc8c497c22 100644
>>> --- a/kernel/irq/internals.h
>>> +++ b/kernel/irq/internals.h
>>> @@ -113,6 +113,8 @@ irqreturn_t handle_irq_event(struct irq_desc *desc);
>>>  
>>>  /* Resending of interrupts :*/
>>>  int check_irq_resend(struct irq_desc *desc, bool inject);
>>> +void clear_irq_resend(struct irq_desc *desc);
>>> +void irq_resend_init(struct irq_desc *desc);
>>>  bool irq_wait_for_poll(struct irq_desc *desc);
>>>  void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action);
>>>  
>>> diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
>>> index 240e145e969f..b401b89b226a 100644
>>> --- a/kernel/irq/irqdesc.c
>>> +++ b/kernel/irq/irqdesc.c
>>> @@ -415,6 +415,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
>>>  	desc_set_defaults(irq, desc, node, affinity, owner);
>>>  	irqd_set(&desc->irq_data, flags);
>>>  	kobject_init(&desc->kobj, &irq_kobj_type);
>>> +	irq_resend_init(desc);
>>>  
>>>  	return desc;
>>>  
>>> @@ -581,6 +582,7 @@ int __init early_irq_init(void)
>>>  		mutex_init(&desc[i].request_mutex);
>>>  		init_waitqueue_head(&desc[i].wait_for_threads);
>>>  		desc_set_defaults(i, &desc[i], node, NULL, NULL);
>>> +		irq_resend_init(desc);
>>>  	}
>>>  	return arch_early_irq_init();
>>>  }
>>> diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
>>> index 0c46e9fe3a89..edec335c0a7a 100644
>>> --- a/kernel/irq/resend.c
>>> +++ b/kernel/irq/resend.c
>>> @@ -21,8 +21,9 @@
>>>  
>>>  #ifdef CONFIG_HARDIRQS_SW_RESEND
>>>  
>>> -/* Bitmap to handle software resend of interrupts: */
>>> -static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
>>> +/* hlist_head to handle software resend of interrupts: */
>>> +static HLIST_HEAD(irq_resend_list);
>>> +static DEFINE_RAW_SPINLOCK(irq_resend_lock);
>>
>> What is the benefit of using hlist here? If you want to enjoy the
>> low latency of querying elements by key, you must define a hlist table
>> with a reasonable number of buckets. Otherwise, I don't think the time
>> complexity of hlist is better than a regular double-linked list, right?
> 
> You do realise that the list is processed in order, one element after
> the other, without ever querying any arbitrary element? Have you read
> the code?

Yes, so i *wonder* why not use regular a linked-list here if no need to do
arbitrary querying. I have no doubt the idea of these changes are sound,
just curious about the data structure used to maintain resend IRQs.

> 
>>
>>>  
>>>  /*
>>>   * Run software resends of IRQ's
>>> @@ -30,18 +31,17 @@ static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
>>>  static void resend_irqs(struct tasklet_struct *unused)
>>>  {
>>>  	struct irq_desc *desc;
>>> -	int irq;
>>> -
>>> -	while (!bitmap_empty(irqs_resend, nr_irqs)) {
>>> -		irq = find_first_bit(irqs_resend, nr_irqs);
>>> -		clear_bit(irq, irqs_resend);
>>> -		desc = irq_to_desc(irq);
>>> -		if (!desc)
>>> -			continue;
>>> -		local_irq_disable();
>>> +
>>> +	raw_spin_lock_irq(&irq_resend_lock);
>>> +	while (!hlist_empty(&irq_resend_list)) {
>>> +		desc = hlist_entry(irq_resend_list.first, struct irq_desc,
>>> +				   resend_node);
>>> +		hlist_del_init(&desc->resend_node);
>>> +		raw_spin_unlock(&irq_resend_lock);
>>>  		desc->handle_irq(desc);
>>> -		local_irq_enable();
>>> +		raw_spin_lock(&irq_resend_lock);
>>>  	}
>>> +	raw_spin_unlock_irq(&irq_resend_lock);
>>>  }
>>>  
>>>  /* Tasklet to handle resend: */
>>> @@ -49,8 +49,6 @@ static DECLARE_TASKLET(resend_tasklet, resend_irqs);
>>>  
>>>  static int irq_sw_resend(struct irq_desc *desc)
>>>  {
>>> -	unsigned int irq = irq_desc_get_irq(desc);
>>> -
>>>  	/*
>>>  	 * Validate whether this interrupt can be safely injected from
>>>  	 * non interrupt context
>>> @@ -70,16 +68,31 @@ static int irq_sw_resend(struct irq_desc *desc)
>>>  		 */
>>>  		if (!desc->parent_irq)
>>>  			return -EINVAL;
>>> -		irq = desc->parent_irq;
>>
>> Why delete this code?
> 
> OK, now I know you haven't read this code at all :-(.

Sorry for the noise, I overlook some core changes.

> 
>>
>>>  	}
>>>  
>>> -	/* Set it pending and activate the softirq: */
>>> -	set_bit(irq, irqs_resend);
>>> +	/* Add to resend_list and activate the softirq: */
>>> +	raw_spin_lock(&irq_resend_lock);
>>> +	hlist_add_head(&desc->resend_node, &irq_resend_list);
>>> +	raw_spin_unlock(&irq_resend_lock);
>>
>> Do you conside a situation where irq_sw_resend() is running on two CPUs concurrently?
>> If so, the same desc could be added into irq_resend_list twice by mistake.
> 
> Have you looked at the calling site (stress on singular), the locking
> requirements, and the role IRQS_REPLAY plays when it comes to queuing
> an interrupt for resend?

I see, the IRQS_REPLAY and locking of irq_desc ensure that irq_sw_resend() is not called
on the same interrupt concurrently, thanks for pointing that out.

> 
> 	M.
>
  
Liao Chang May 30, 2023, 1:59 a.m. UTC | #5
Hi, Thomas

在 2023/5/30 5:51, Thomas Gleixner 写道:
> On Mon, May 29 2023 at 15:57, Chang Liao wrote:
>> 在 2023/5/19 21:49, Shanker Donthineni 写道:
>>> diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
>>> index 844a8e30e6de..d9451d456a73 100644
>>> --- a/include/linux/irqdesc.h
>>> +++ b/include/linux/irqdesc.h
>>> @@ -102,6 +102,9 @@ struct irq_desc {
>>>  	int			parent_irq;
>>>  	struct module		*owner;
>>>  	const char		*name;
>>> +#ifdef CONFIG_HARDIRQS_SW_RESEND
>>> +	struct hlist_node	resend_node;
>>> +#endif
>>>  } ____cacheline_internodealigned_in_smp;
>>
>> Although there is no documented rule that limits the change of the KABI
>> struct irq_desc, it is still better to keep the irq_desc definition
>> stable.
> 
> Please read and understand:
> 
>        Documentation/process/stable-api-nonsense.rst
> 
> If you want KABI, then that's  _YOUR_ problem, period.

Thanks for the guide.

> 
>>> -/* Bitmap to handle software resend of interrupts: */
>>> -static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
>>> +/* hlist_head to handle software resend of interrupts: */
>>> +static HLIST_HEAD(irq_resend_list);
>>> +static DEFINE_RAW_SPINLOCK(irq_resend_lock);
>>
>> What is the benefit of using hlist here? If you want to enjoy the
>> low latency of querying elements by key, you must define a hlist table
>> with a reasonable number of buckets. Otherwise, I don't think the time
>> complexity of hlist is better than a regular double-linked list,
>> right?
> 
> What's complex about hlist in this case? Please explain.

Honestly, it is not about the complexity. Perhaps I do not understand the
usage of hlist very deeply. I have searched some codes in the kernel and
found that hlist is always used to speed up arbitrary querying, such as
searching a registered kprobe by address. Back to this patch, these resend
IRQs are organized in a sequence list actually, and traveled one by one to
handle. Further, by comparing the difference between hlist_empty, hlist_add_head,
hlist_del_init, and their counterparts in list, it looks like a regular linked
list is also good enough.

Thanks.

> 
> Thanks,
> 
>         tglx
  
Marc Zyngier May 30, 2023, 7:27 a.m. UTC | #6
On Tue, 30 May 2023 02:44:05 +0100,
"Liao, Chang" <liaochang1@huawei.com> wrote:
> 
> >> What is the benefit of using hlist here? If you want to enjoy the
> >> low latency of querying elements by key, you must define a hlist table
> >> with a reasonable number of buckets. Otherwise, I don't think the time
> >> complexity of hlist is better than a regular double-linked list, right?
> > 
> > You do realise that the list is processed in order, one element after
> > the other, without ever querying any arbitrary element? Have you read
> > the code?
> 
> Yes, so i *wonder* why not use regular a linked-list here if no need to do
> arbitrary querying. I have no doubt the idea of these changes are sound,
> just curious about the data structure used to maintain resend IRQs.

What about it? For the use case at hand, they result in the same
complexity. Unless you have spotted a corner case that results in a
non O(1) complexity?

	M.
  
Thomas Gleixner May 30, 2023, 12:19 p.m. UTC | #7
On Tue, May 30 2023 at 09:59, Chang Liao wrote:
> 在 2023/5/30 5:51, Thomas Gleixner 写道:
>>> What is the benefit of using hlist here? If you want to enjoy the
>>> low latency of querying elements by key, you must define a hlist table
>>> with a reasonable number of buckets. Otherwise, I don't think the time
>>> complexity of hlist is better than a regular double-linked list,
>>> right?
>> 
>> What's complex about hlist in this case? Please explain.
>
> Honestly, it is not about the complexity. Perhaps I do not understand the
> usage of hlist very deeply. I have searched some codes in the kernel and
> found that hlist is always used to speed up arbitrary querying, such as
> searching a registered kprobe by address. Back to this patch, these resend
> IRQs are organized in a sequence list actually, and traveled one by one to
> handle. Further, by comparing the difference between hlist_empty, hlist_add_head,
> hlist_del_init, and their counterparts in list, it looks like a regular linked
> list is also good enough.

Sure that works too.

The main difference between regular linked lists and hlist is that the
list head of hlist is half the size of a regular double linked list.

The only downside of hlist is that there is no back link in the list
head to the tail. Searching for the tail is O(N) while on a double
linked list it's O(1).

Nothing in this use case needs to access the tail. So what's your
problem?

Thanks,

        tglx
  
Liao Chang June 2, 2023, 1:36 a.m. UTC | #8
在 2023/5/30 20:19, Thomas Gleixner 写道:
> On Tue, May 30 2023 at 09:59, Chang Liao wrote:
>> 在 2023/5/30 5:51, Thomas Gleixner 写道:
>>>> What is the benefit of using hlist here? If you want to enjoy the
>>>> low latency of querying elements by key, you must define a hlist table
>>>> with a reasonable number of buckets. Otherwise, I don't think the time
>>>> complexity of hlist is better than a regular double-linked list,
>>>> right?
>>>
>>> What's complex about hlist in this case? Please explain.
>>
>> Honestly, it is not about the complexity. Perhaps I do not understand the
>> usage of hlist very deeply. I have searched some codes in the kernel and
>> found that hlist is always used to speed up arbitrary querying, such as
>> searching a registered kprobe by address. Back to this patch, these resend
>> IRQs are organized in a sequence list actually, and traveled one by one to
>> handle. Further, by comparing the difference between hlist_empty, hlist_add_head,
>> hlist_del_init, and their counterparts in list, it looks like a regular linked
>> list is also good enough.
> 
> Sure that works too.
> 
> The main difference between regular linked lists and hlist is that the
> list head of hlist is half the size of a regular double linked list.
> 
> The only downside of hlist is that there is no back link in the list
> head to the tail. Searching for the tail is O(N) while on a double
> linked list it's O(1).
> 
> Nothing in this use case needs to access the tail. So what's your
> problem?

Oh, that is the point, your explanation made it all clear, my problem is solved,
Thanks!

> 
> Thanks,
> 
>         tglx
  

Patch

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 844a8e30e6de..d9451d456a73 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -102,6 +102,9 @@  struct irq_desc {
 	int			parent_irq;
 	struct module		*owner;
 	const char		*name;
+#ifdef CONFIG_HARDIRQS_SW_RESEND
+	struct hlist_node	resend_node;
+#endif
 } ____cacheline_internodealigned_in_smp;
 
 #ifdef CONFIG_SPARSE_IRQ
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 49e7bc871fec..2eac5532c3c8 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -306,6 +306,7 @@  static void __irq_disable(struct irq_desc *desc, bool mask);
 void irq_shutdown(struct irq_desc *desc)
 {
 	if (irqd_is_started(&desc->irq_data)) {
+		clear_irq_resend(desc);
 		desc->depth = 1;
 		if (desc->irq_data.chip->irq_shutdown) {
 			desc->irq_data.chip->irq_shutdown(&desc->irq_data);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 5fdc0b557579..51fc8c497c22 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -113,6 +113,8 @@  irqreturn_t handle_irq_event(struct irq_desc *desc);
 
 /* Resending of interrupts :*/
 int check_irq_resend(struct irq_desc *desc, bool inject);
+void clear_irq_resend(struct irq_desc *desc);
+void irq_resend_init(struct irq_desc *desc);
 bool irq_wait_for_poll(struct irq_desc *desc);
 void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action);
 
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 240e145e969f..b401b89b226a 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -415,6 +415,7 @@  static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
 	desc_set_defaults(irq, desc, node, affinity, owner);
 	irqd_set(&desc->irq_data, flags);
 	kobject_init(&desc->kobj, &irq_kobj_type);
+	irq_resend_init(desc);
 
 	return desc;
 
@@ -581,6 +582,7 @@  int __init early_irq_init(void)
 		mutex_init(&desc[i].request_mutex);
 		init_waitqueue_head(&desc[i].wait_for_threads);
 		desc_set_defaults(i, &desc[i], node, NULL, NULL);
+		irq_resend_init(desc);
 	}
 	return arch_early_irq_init();
 }
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index 0c46e9fe3a89..edec335c0a7a 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -21,8 +21,9 @@ 
 
 #ifdef CONFIG_HARDIRQS_SW_RESEND
 
-/* Bitmap to handle software resend of interrupts: */
-static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
+/* hlist_head to handle software resend of interrupts: */
+static HLIST_HEAD(irq_resend_list);
+static DEFINE_RAW_SPINLOCK(irq_resend_lock);
 
 /*
  * Run software resends of IRQ's
@@ -30,18 +31,17 @@  static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
 static void resend_irqs(struct tasklet_struct *unused)
 {
 	struct irq_desc *desc;
-	int irq;
-
-	while (!bitmap_empty(irqs_resend, nr_irqs)) {
-		irq = find_first_bit(irqs_resend, nr_irqs);
-		clear_bit(irq, irqs_resend);
-		desc = irq_to_desc(irq);
-		if (!desc)
-			continue;
-		local_irq_disable();
+
+	raw_spin_lock_irq(&irq_resend_lock);
+	while (!hlist_empty(&irq_resend_list)) {
+		desc = hlist_entry(irq_resend_list.first, struct irq_desc,
+				   resend_node);
+		hlist_del_init(&desc->resend_node);
+		raw_spin_unlock(&irq_resend_lock);
 		desc->handle_irq(desc);
-		local_irq_enable();
+		raw_spin_lock(&irq_resend_lock);
 	}
+	raw_spin_unlock_irq(&irq_resend_lock);
 }
 
 /* Tasklet to handle resend: */
@@ -49,8 +49,6 @@  static DECLARE_TASKLET(resend_tasklet, resend_irqs);
 
 static int irq_sw_resend(struct irq_desc *desc)
 {
-	unsigned int irq = irq_desc_get_irq(desc);
-
 	/*
 	 * Validate whether this interrupt can be safely injected from
 	 * non interrupt context
@@ -70,16 +68,31 @@  static int irq_sw_resend(struct irq_desc *desc)
 		 */
 		if (!desc->parent_irq)
 			return -EINVAL;
-		irq = desc->parent_irq;
 	}
 
-	/* Set it pending and activate the softirq: */
-	set_bit(irq, irqs_resend);
+	/* Add to resend_list and activate the softirq: */
+	raw_spin_lock(&irq_resend_lock);
+	hlist_add_head(&desc->resend_node, &irq_resend_list);
+	raw_spin_unlock(&irq_resend_lock);
 	tasklet_schedule(&resend_tasklet);
 	return 0;
 }
 
+void clear_irq_resend(struct irq_desc *desc)
+{
+	raw_spin_lock(&irq_resend_lock);
+	hlist_del_init(&desc->resend_node);
+	raw_spin_unlock(&irq_resend_lock);
+}
+
+void irq_resend_init(struct irq_desc *desc)
+{
+	INIT_HLIST_NODE(&desc->resend_node);
+}
 #else
+void clear_irq_resend(struct irq_desc *desc) {}
+void irq_resend_init(struct irq_desc *desc) {}
+
 static int irq_sw_resend(struct irq_desc *desc)
 {
 	return -EINVAL;