[v21,2/7] crash: add generic infrastructure for crash hotplug support

Message ID 20230404180326.6890-3-eric.devolder@oracle.com
State New
Headers
Series crash: Kernel handling of CPU and memory hot un/plug |

Commit Message

Eric DeVolder April 4, 2023, 6:03 p.m. UTC
  To support crash hotplug, a mechanism is needed to update the crash
elfcorehdr upon CPU or memory changes (eg. hot un/plug or off/
onlining). The crash elfcorehdr describes the CPUs and memory to
be written into the vmcore.

To track CPU changes, callbacks are registered with the cpuhp
mechanism via cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN). The
crash hotplug elfcorehdr update has no explicit ordering requirement
(relative to other cpuhp states), so meets the criteria for
utilizing CPUHP_BP_PREPARE_DYN. CPUHP_BP_PREPARE_DYN is a dynamic
state and avoids the need to introduce a new state for crash
hotplug. Also, this is the last state in the PREPARE group, just
prior to the STARTING group, which is very close to the CPU
starting up in a plug/online situation, or stopping in a unplug/
offline situation. This minimizes the window of time during an
actual plug/online or unplug/offline situation in which the
elfcorehdr would be inaccurate. Note that for a CPU being unplugged
or offlined, the CPU will still be present in the list of CPUs
generated by crash_prepare_elf64_headers(). However, there is no
need to explicitly omit the CPU, see the discussion in the patch
'crash: change crash_prepare_elf64_headers() to
for_each_possible_cpu()'.

To track memory changes, a notifier is registered to capture the
memblock MEM_ONLINE and MEM_OFFLINE events via register_memory_notifier().

The CPU callbacks and memory notifiers invoke crash_handle_hotplug_event()
which performs needed tasks and then dispatches the event to the
architecture specific arch_crash_handle_hotplug_event() to update the
elfcorehdr with the current state of CPUs and memory. During the
process, the kexec_lock is held.

Signed-off-by: Eric DeVolder <eric.devolder@oracle.com>
Reviewed-by: Sourabh Jain <sourabhjain@linux.ibm.com>
---
 include/linux/crash_core.h |   9 +++
 include/linux/kexec.h      |  11 +++
 kernel/crash_core.c        | 142 +++++++++++++++++++++++++++++++++++++
 kernel/kexec_core.c        |   6 ++
 4 files changed, 168 insertions(+)
  

Comments

Baoquan He April 6, 2023, 11:04 a.m. UTC | #1
On 04/04/23 at 02:03pm, Eric DeVolder wrote:
......
> +static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
> +{
> +	struct kimage *image;
> +
> +	/* Obtain lock while changing crash information */
> +	if (!kexec_trylock()) {
> +		pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n");
> +		return;
> +	}
> +
> +	/* Check kdump is not loaded */
> +	if (!kexec_crash_image)
> +		goto out;
> +
> +	image = kexec_crash_image;
> +
> +	if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
> +		hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
> +		pr_debug("hp_action %u, cpu %u\n", hp_action, cpu);
> +	else
> +		pr_debug("hp_action %u\n", hp_action);

Seems we passed in the cpu number just for printing here. Wondering why
we don't print out hot added/removed memory ranges. Is the cpu number
printing necessary?

> +
> +	/*
> +	 * When the struct kimage is allocated, the elfcorehdr_index
> +	 * is set to -1. Find the segment containing the elfcorehdr,
> +	 * if not already found. This works for both the kexec_load
> +	 * and kexec_file_load paths.
> +	 */
> +	if (image->elfcorehdr_index < 0) {
> +		unsigned long mem;
> +		unsigned char *ptr;
> +		unsigned int n;
> +
> +		for (n = 0; n < image->nr_segments; n++) {
> +			mem = image->segment[n].mem;
> +			ptr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT));
> +			if (ptr) {
> +				/* The segment containing elfcorehdr */
> +				if (memcmp(ptr, ELFMAG, SELFMAG) == 0)
> +					image->elfcorehdr_index = (int)n;
> +				kunmap_local(ptr);
> +			}
> +		}
> +	}
> +
> +	if (image->elfcorehdr_index < 0) {
> +		pr_err("unable to locate elfcorehdr segment");
> +		goto out;
> +	}
> +
> +	/* Needed in order for the segments to be updated */
> +	arch_kexec_unprotect_crashkres();
> +
> +	/* Differentiate between normal load and hotplug update */
> +	image->hp_action = hp_action;
> +
> +	/* Now invoke arch-specific update handler */
> +	arch_crash_handle_hotplug_event(image);
> +
> +	/* No longer handling a hotplug event */
> +	image->hp_action = KEXEC_CRASH_HP_NONE;
> +	image->elfcorehdr_updated = true;
> +
> +	/* Change back to read-only */
> +	arch_kexec_protect_crashkres();
> +
> +out:
> +	/* Release lock now that update complete */
> +	kexec_unlock();
> +}
> +
> +static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
> +{
> +	switch (val) {
> +	case MEM_ONLINE:
> +		crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY,
> +			KEXEC_CRASH_HP_INVALID_CPU);
> +		break;
> +
> +	case MEM_OFFLINE:
> +		crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY,
> +			KEXEC_CRASH_HP_INVALID_CPU);
> +		break;
> +	}
> +	return NOTIFY_OK;
> +}
> +
> +static struct notifier_block crash_memhp_nb = {
> +	.notifier_call = crash_memhp_notifier,
> +	.priority = 0
> +};
> +
> +static int crash_cpuhp_online(unsigned int cpu)
> +{
> +	crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu);
> +	return 0;
> +}
> +
> +static int crash_cpuhp_offline(unsigned int cpu)
> +{
> +	crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu);
> +	return 0;
> +}
> +
> +static int __init crash_hotplug_init(void)
> +{
> +	int result = 0;
> +
> +	if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
> +		register_memory_notifier(&crash_memhp_nb);
> +
> +	if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
> +		result = cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN,
> +			"crash/cpuhp", crash_cpuhp_online, crash_cpuhp_offline);
> +	}
> +
> +	return result;
> +}
> +
> +subsys_initcall(crash_hotplug_init);
> +#endif
> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> index 3d578c6fefee..8296d019737c 100644
> --- a/kernel/kexec_core.c
> +++ b/kernel/kexec_core.c
> @@ -277,6 +277,12 @@ struct kimage *do_kimage_alloc_init(void)
>  	/* Initialize the list of unusable pages */
>  	INIT_LIST_HEAD(&image->unusable_pages);
>  
> +#ifdef CONFIG_CRASH_HOTPLUG
> +	image->hp_action = KEXEC_CRASH_HP_NONE;
> +	image->elfcorehdr_index = -1;
> +	image->elfcorehdr_updated = false;
> +#endif
> +
>  	return image;
>  }
>  
> -- 
> 2.31.1
>
  
Eric DeVolder April 6, 2023, 4:10 p.m. UTC | #2
On 4/6/23 06:04, Baoquan He wrote:
> On 04/04/23 at 02:03pm, Eric DeVolder wrote:
> ......
>> +static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
>> +{
>> +	struct kimage *image;
>> +
>> +	/* Obtain lock while changing crash information */
>> +	if (!kexec_trylock()) {
>> +		pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n");
>> +		return;
>> +	}
>> +
>> +	/* Check kdump is not loaded */
>> +	if (!kexec_crash_image)
>> +		goto out;
>> +
>> +	image = kexec_crash_image;
>> +
>> +	if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
>> +		hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
>> +		pr_debug("hp_action %u, cpu %u\n", hp_action, cpu);
>> +	else
>> +		pr_debug("hp_action %u\n", hp_action);
> 
> Seems we passed in the cpu number just for printing here. Wondering why
> we don't print out hot added/removed memory ranges. Is the cpu number
> printing necessary?
> 
Baoquan,

Ah, actually until recently it was used to track the 'offlinecpu' in this function, but tglx pointed 
out that was un-necessary. That resulted in dropping the code in this function dealing with 
offlinecpu, leaving this as its only use in this function.

The printing of cpu number is not necessary, but helpful; I use it for debugging.

The printing of memory range is also not necessary, but in order to do that, should we choose to do 
so, requires passing in the memory range to this function. This patch series did do this early on, 
and by v7 I dropped it at your urging 
(https://lore.kernel.org/lkml/20220401183040.1624-1-eric.devolder@oracle.com/). At the time, I 
provided it since I considered this generic infrastructure, but I could not defend it since x86 
didn't need it. However, PPC now needs this, and is now carrying this as part of PPC support of 
CRASH_HOTPLUG 
(https://lore.kernel.org/linuxppc-dev/20230312181154.278900-6-sourabhjain@linux.ibm.com/T/#u).

If you'd rather I pickup the memory range handling again, I can do that. I think I'd likely change 
this function to be:

   void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu,
      struct memory_notify *mhp);

where on a CPU op the 'cpu' parameter would be valid and 'mhp' NULL, and on a memory op,
the 'mhp' would be valid and 'cpu' parameter invalid(0).

I'd likely then stuff these two parameters into struct kimage so that it can be utilized by 
arch-specific handler, if needed.

And of course, would print out the memory range for debug purposes.

Let me know what you think.
eric


>> +
>> +	/*
>> +	 * When the struct kimage is allocated, the elfcorehdr_index
>> +	 * is set to -1. Find the segment containing the elfcorehdr,
>> +	 * if not already found. This works for both the kexec_load
>> +	 * and kexec_file_load paths.
>> +	 */
>> +	if (image->elfcorehdr_index < 0) {
>> +		unsigned long mem;
>> +		unsigned char *ptr;
>> +		unsigned int n;
>> +
>> +		for (n = 0; n < image->nr_segments; n++) {
>> +			mem = image->segment[n].mem;
>> +			ptr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT));
>> +			if (ptr) {
>> +				/* The segment containing elfcorehdr */
>> +				if (memcmp(ptr, ELFMAG, SELFMAG) == 0)
>> +					image->elfcorehdr_index = (int)n;
>> +				kunmap_local(ptr);
>> +			}
>> +		}
>> +	}
>> +
>> +	if (image->elfcorehdr_index < 0) {
>> +		pr_err("unable to locate elfcorehdr segment");
>> +		goto out;
>> +	}
>> +
>> +	/* Needed in order for the segments to be updated */
>> +	arch_kexec_unprotect_crashkres();
>> +
>> +	/* Differentiate between normal load and hotplug update */
>> +	image->hp_action = hp_action;
>> +
>> +	/* Now invoke arch-specific update handler */
>> +	arch_crash_handle_hotplug_event(image);
>> +
>> +	/* No longer handling a hotplug event */
>> +	image->hp_action = KEXEC_CRASH_HP_NONE;
>> +	image->elfcorehdr_updated = true;
>> +
>> +	/* Change back to read-only */
>> +	arch_kexec_protect_crashkres();
>> +
>> +out:
>> +	/* Release lock now that update complete */
>> +	kexec_unlock();
>> +}
>> +
>> +static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
>> +{
>> +	switch (val) {
>> +	case MEM_ONLINE:
>> +		crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY,
>> +			KEXEC_CRASH_HP_INVALID_CPU);
>> +		break;
>> +
>> +	case MEM_OFFLINE:
>> +		crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY,
>> +			KEXEC_CRASH_HP_INVALID_CPU);
>> +		break;
>> +	}
>> +	return NOTIFY_OK;
>> +}
>> +
>> +static struct notifier_block crash_memhp_nb = {
>> +	.notifier_call = crash_memhp_notifier,
>> +	.priority = 0
>> +};
>> +
>> +static int crash_cpuhp_online(unsigned int cpu)
>> +{
>> +	crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu);
>> +	return 0;
>> +}
>> +
>> +static int crash_cpuhp_offline(unsigned int cpu)
>> +{
>> +	crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu);
>> +	return 0;
>> +}
>> +
>> +static int __init crash_hotplug_init(void)
>> +{
>> +	int result = 0;
>> +
>> +	if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
>> +		register_memory_notifier(&crash_memhp_nb);
>> +
>> +	if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
>> +		result = cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN,
>> +			"crash/cpuhp", crash_cpuhp_online, crash_cpuhp_offline);
>> +	}
>> +
>> +	return result;
>> +}
>> +
>> +subsys_initcall(crash_hotplug_init);
>> +#endif
>> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
>> index 3d578c6fefee..8296d019737c 100644
>> --- a/kernel/kexec_core.c
>> +++ b/kernel/kexec_core.c
>> @@ -277,6 +277,12 @@ struct kimage *do_kimage_alloc_init(void)
>>   	/* Initialize the list of unusable pages */
>>   	INIT_LIST_HEAD(&image->unusable_pages);
>>   
>> +#ifdef CONFIG_CRASH_HOTPLUG
>> +	image->hp_action = KEXEC_CRASH_HP_NONE;
>> +	image->elfcorehdr_index = -1;
>> +	image->elfcorehdr_updated = false;
>> +#endif
>> +
>>   	return image;
>>   }
>>   
>> -- 
>> 2.31.1
>>
>
  
Baoquan He April 6, 2023, 11:58 p.m. UTC | #3
On 04/06/23 at 11:10am, Eric DeVolder wrote:
> 
> 
> On 4/6/23 06:04, Baoquan He wrote:
> > On 04/04/23 at 02:03pm, Eric DeVolder wrote:
> > ......
> > > +static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
> > > +{
> > > +	struct kimage *image;
> > > +
> > > +	/* Obtain lock while changing crash information */
> > > +	if (!kexec_trylock()) {
> > > +		pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n");
> > > +		return;
> > > +	}
> > > +
> > > +	/* Check kdump is not loaded */
> > > +	if (!kexec_crash_image)
> > > +		goto out;
> > > +
> > > +	image = kexec_crash_image;
> > > +
> > > +	if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
> > > +		hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
> > > +		pr_debug("hp_action %u, cpu %u\n", hp_action, cpu);
> > > +	else
> > > +		pr_debug("hp_action %u\n", hp_action);
> > 
> > Seems we passed in the cpu number just for printing here. Wondering why
> > we don't print out hot added/removed memory ranges. Is the cpu number
> > printing necessary?
> > 
> Baoquan,
> 
> Ah, actually until recently it was used to track the 'offlinecpu' in this
> function, but tglx pointed out that was un-necessary. That resulted in
> dropping the code in this function dealing with offlinecpu, leaving this as
> its only use in this function.
> 
> The printing of cpu number is not necessary, but helpful; I use it for debugging.

OK, I see. I am not requesting memory range printing, just try to prove
cpu number printing is not so justified. If it's helpful, I am OK with
it. Let's see if other people have concern about this.

> 
> The printing of memory range is also not necessary, but in order to do that,
> should we choose to do so, requires passing in the memory range to this
> function. This patch series did do this early on, and by v7 I dropped it at
> your urging (https://lore.kernel.org/lkml/20220401183040.1624-1-eric.devolder@oracle.com/).
> At the time, I provided it since I considered this generic infrastructure,
> but I could not defend it since x86 didn't need it. However, PPC now needs
> this, and is now carrying this as part of PPC support of CRASH_HOTPLUG (https://lore.kernel.org/linuxppc-dev/20230312181154.278900-6-sourabhjain@linux.ibm.com/T/#u).
> 
> If you'd rather I pickup the memory range handling again, I can do that. I
> think I'd likely change this function to be:
> 
>   void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu,
>      struct memory_notify *mhp);
> 
> where on a CPU op the 'cpu' parameter would be valid and 'mhp' NULL, and on a memory op,
> the 'mhp' would be valid and 'cpu' parameter invalid(0).
> 
> I'd likely then stuff these two parameters into struct kimage so that it can
> be utilized by arch-specific handler, if needed.
> 
> And of course, would print out the memory range for debug purposes.
> 
> Let me know what you think.
  
Sourabh Jain April 12, 2023, 8:38 a.m. UTC | #4
On 06/04/23 21:40, Eric DeVolder wrote:
>
>
> On 4/6/23 06:04, Baoquan He wrote:
>> On 04/04/23 at 02:03pm, Eric DeVolder wrote:
>> ......
>>> +static void crash_handle_hotplug_event(unsigned int hp_action, 
>>> unsigned int cpu)
>>> +{
>>> +    struct kimage *image;
>>> +
>>> +    /* Obtain lock while changing crash information */
>>> +    if (!kexec_trylock()) {
>>> +        pr_info("kexec_trylock() failed, elfcorehdr may be 
>>> inaccurate\n");
>>> +        return;
>>> +    }
>>> +
>>> +    /* Check kdump is not loaded */
>>> +    if (!kexec_crash_image)
>>> +        goto out;
>>> +
>>> +    image = kexec_crash_image;
>>> +
>>> +    if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
>>> +        hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
>>> +        pr_debug("hp_action %u, cpu %u\n", hp_action, cpu);
>>> +    else
>>> +        pr_debug("hp_action %u\n", hp_action);
>>
>> Seems we passed in the cpu number just for printing here. Wondering why
>> we don't print out hot added/removed memory ranges. Is the cpu number
>> printing necessary?
>>
> Baoquan,
>
> Ah, actually until recently it was used to track the 'offlinecpu' in 
> this function, but tglx pointed out that was un-necessary. That 
> resulted in dropping the code in this function dealing with 
> offlinecpu, leaving this as its only use in this function.
>
> The printing of cpu number is not necessary, but helpful; I use it for 
> debugging.
>
> The printing of memory range is also not necessary, but in order to do 
> that, should we choose to do so, requires passing in the memory range 
> to this function. This patch series did do this early on, and by v7 I 
> dropped it at your urging 
> (https://lore.kernel.org/lkml/20220401183040.1624-1-eric.devolder@oracle.com/). 
> At the time, I provided it since I considered this generic 
> infrastructure, but I could not defend it since x86 didn't need it. 
> However, PPC now needs this, and is now carrying this as part of PPC 
> support of CRASH_HOTPLUG 
> (https://lore.kernel.org/linuxppc-dev/20230312181154.278900-6-sourabhjain@linux.ibm.com/T/#u).
>
> If you'd rather I pickup the memory range handling again, I can do 
> that. I think I'd likely change this function to be:
>
>   void crash_handle_hotplug_event(unsigned int hp_action, unsigned int 
> cpu,
>      struct memory_notify *mhp);
>
> where on a CPU op the 'cpu' parameter would be valid and 'mhp' NULL, 
> and on a memory op,
> the 'mhp' would be valid and 'cpu' parameter invalid(0).
>
> I'd likely then stuff these two parameters into struct kimage so that 
> it can be utilized by arch-specific handler, if needed.
>
> And of course, would print out the memory range for debug purposes.

I think passing memory_notify as parameter is a better approach compare 
to adding the
same into struct kimage. Because once the crash hotplug event is served 
the memory_notify
object is not useful.

- Sourabh
  
Eric DeVolder April 18, 2023, 1:55 p.m. UTC | #5
On 4/6/23 18:58, Baoquan He wrote:
> On 04/06/23 at 11:10am, Eric DeVolder wrote:
>>
>>
>> On 4/6/23 06:04, Baoquan He wrote:
>>> On 04/04/23 at 02:03pm, Eric DeVolder wrote:
>>> ......
>>>> +static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
>>>> +{
>>>> +	struct kimage *image;
>>>> +
>>>> +	/* Obtain lock while changing crash information */
>>>> +	if (!kexec_trylock()) {
>>>> +		pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n");
>>>> +		return;
>>>> +	}
>>>> +
>>>> +	/* Check kdump is not loaded */
>>>> +	if (!kexec_crash_image)
>>>> +		goto out;
>>>> +
>>>> +	image = kexec_crash_image;
>>>> +
>>>> +	if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
>>>> +		hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
>>>> +		pr_debug("hp_action %u, cpu %u\n", hp_action, cpu);
>>>> +	else
>>>> +		pr_debug("hp_action %u\n", hp_action);
>>>
>>> Seems we passed in the cpu number just for printing here. Wondering why
>>> we don't print out hot added/removed memory ranges. Is the cpu number
>>> printing necessary?
>>>
>> Baoquan,
>>
>> Ah, actually until recently it was used to track the 'offlinecpu' in this
>> function, but tglx pointed out that was un-necessary. That resulted in
>> dropping the code in this function dealing with offlinecpu, leaving this as
>> its only use in this function.
>>
>> The printing of cpu number is not necessary, but helpful; I use it for debugging.
> 
> OK, I see. I am not requesting memory range printing, just try to prove
> cpu number printing is not so justified. If it's helpful, I am OK with
> it. Let's see if other people have concern about this.
> 

I do not plan on adding the memory range printing.

>>
>> The printing of memory range is also not necessary, but in order to do that,
>> should we choose to do so, requires passing in the memory range to this
>> function. This patch series did do this early on, and by v7 I dropped it at
>> your urging (https://lore.kernel.org/lkml/20220401183040.1624-1-eric.devolder@oracle.com/).
>> At the time, I provided it since I considered this generic infrastructure,
>> but I could not defend it since x86 didn't need it. However, PPC now needs
>> this, and is now carrying this as part of PPC support of CRASH_HOTPLUG (https://lore.kernel.org/linuxppc-dev/20230312181154.278900-6-sourabhjain@linux.ibm.com/T/#u).
>>
>> If you'd rather I pickup the memory range handling again, I can do that. I
>> think I'd likely change this function to be:
>>
>>    void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu,
>>       struct memory_notify *mhp);
>>
>> where on a CPU op the 'cpu' parameter would be valid and 'mhp' NULL, and on a memory op,
>> the 'mhp' would be valid and 'cpu' parameter invalid(0).
>>
>> I'd likely then stuff these two parameters into struct kimage so that it can
>> be utilized by arch-specific handler, if needed.
>>
>> And of course, would print out the memory range for debug purposes.
>>
>> Let me know what you think.
> 

I do not plan on adding the memory range handling; I'll let Sourabh do that as he has a use case for it.

As such, I don't see any other request for changes.

Thanks!
eric
  
Baoquan He April 19, 2023, 12:05 a.m. UTC | #6
On 04/18/23 at 08:55am, Eric DeVolder wrote:
......
> > > > Seems we passed in the cpu number just for printing here. Wondering why
> > > > we don't print out hot added/removed memory ranges. Is the cpu number
> > > > printing necessary?
> > > > 
> > > Baoquan,
> > > 
> > > Ah, actually until recently it was used to track the 'offlinecpu' in this
> > > function, but tglx pointed out that was un-necessary. That resulted in
> > > dropping the code in this function dealing with offlinecpu, leaving this as
> > > its only use in this function.
> > > 
> > > The printing of cpu number is not necessary, but helpful; I use it for debugging.
> > 
> > OK, I see. I am not requesting memory range printing, just try to prove
> > cpu number printing is not so justified. If it's helpful, I am OK with
> > it. Let's see if other people have concern about this.
> > 
> 
> I do not plan on adding the memory range printing.
> 
> > > 
> > > The printing of memory range is also not necessary, but in order to do that,
> > > should we choose to do so, requires passing in the memory range to this
> > > function. This patch series did do this early on, and by v7 I dropped it at
> > > your urging (https://lore.kernel.org/lkml/20220401183040.1624-1-eric.devolder@oracle.com/).
> > > At the time, I provided it since I considered this generic infrastructure,
> > > but I could not defend it since x86 didn't need it. However, PPC now needs
> > > this, and is now carrying this as part of PPC support of CRASH_HOTPLUG (https://lore.kernel.org/linuxppc-dev/20230312181154.278900-6-sourabhjain@linux.ibm.com/T/#u).
> > > 
> > > If you'd rather I pickup the memory range handling again, I can do that. I
> > > think I'd likely change this function to be:
> > > 
> > >    void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu,
> > >       struct memory_notify *mhp);
> > > 
> > > where on a CPU op the 'cpu' parameter would be valid and 'mhp' NULL, and on a memory op,
> > > the 'mhp' would be valid and 'cpu' parameter invalid(0).
> > > 
> > > I'd likely then stuff these two parameters into struct kimage so that it can
> > > be utilized by arch-specific handler, if needed.
> > > 
> > > And of course, would print out the memory range for debug purposes.
> > > 
> > > Let me know what you think.
> > 
> 
> I do not plan on adding the memory range handling; I'll let Sourabh do that as he has a use case for it.
> 
> As such, I don't see any other request for changes.

OK, then I have no concern about this patchset. Thanks a lot for all
these effort, Eric.

Hi x86 maintainers,

Could you help check if there's anything we need improve, or consider
taking this patchset?

Thanks
Baoquan
  

Patch

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index de62a722431e..e14345cc7a22 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -84,4 +84,13 @@  int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
 int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
 		unsigned long long *crash_size, unsigned long long *crash_base);
 
+#define KEXEC_CRASH_HP_NONE			0
+#define KEXEC_CRASH_HP_ADD_CPU			1
+#define KEXEC_CRASH_HP_REMOVE_CPU		2
+#define KEXEC_CRASH_HP_ADD_MEMORY		3
+#define KEXEC_CRASH_HP_REMOVE_MEMORY		4
+#define KEXEC_CRASH_HP_INVALID_CPU		-1U
+
+struct kimage;
+
 #endif /* LINUX_CRASH_CORE_H */
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 61443f8395f7..f9bd46fcbd5d 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -33,6 +33,7 @@  extern note_buf_t __percpu *crash_notes;
 #include <linux/compat.h>
 #include <linux/ioport.h>
 #include <linux/module.h>
+#include <linux/highmem.h>
 #include <asm/kexec.h>
 
 /* Verify architecture specific macros are defined */
@@ -368,6 +369,12 @@  struct kimage {
 	struct purgatory_info purgatory_info;
 #endif
 
+#ifdef CONFIG_CRASH_HOTPLUG
+	int hp_action;
+	int elfcorehdr_index;
+	bool elfcorehdr_updated;
+#endif
+
 #ifdef CONFIG_IMA_KEXEC
 	/* Virtual address of IMA measurement buffer for kexec syscall */
 	void *ima_buffer;
@@ -498,6 +505,10 @@  static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g
 static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
 #endif
 
+#ifndef arch_crash_handle_hotplug_event
+static inline void arch_crash_handle_hotplug_event(struct kimage *image) { }
+#endif
+
 #else /* !CONFIG_KEXEC_CORE */
 struct pt_regs;
 struct task_struct;
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 45d241aebe3d..0d87b796e5cf 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -11,6 +11,8 @@ 
 #include <linux/vmalloc.h>
 #include <linux/sizes.h>
 #include <linux/kexec.h>
+#include <linux/memory.h>
+#include <linux/cpuhotplug.h>
 
 #include <asm/page.h>
 #include <asm/sections.h>
@@ -18,6 +20,7 @@ 
 #include <crypto/sha1.h>
 
 #include "kallsyms_internal.h"
+#include "kexec_internal.h"
 
 /* vmcoreinfo stuff */
 unsigned char *vmcoreinfo_data;
@@ -697,3 +700,142 @@  static int __init crash_save_vmcoreinfo_init(void)
 }
 
 subsys_initcall(crash_save_vmcoreinfo_init);
+
+#ifdef CONFIG_CRASH_HOTPLUG
+#undef pr_fmt
+#define pr_fmt(fmt) "crash hp: " fmt
+/*
+ * To accurately reflect hot un/plug changes of cpu and memory resources
+ * (including onling and offlining of those resources), the elfcorehdr
+ * (which is passed to the crash kernel via the elfcorehdr= parameter)
+ * must be updated with the new list of CPUs and memories.
+ *
+ * In order to make changes to elfcorehdr, two conditions are needed:
+ * First, the segment containing the elfcorehdr must be large enough
+ * to permit a growing number of resources; the elfcorehdr memory size
+ * is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES.
+ * Second, purgatory must explicitly exclude the elfcorehdr from the
+ * list of segments it checks (since the elfcorehdr changes and thus
+ * would require an update to purgatory itself to update the digest).
+ */
+static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
+{
+	struct kimage *image;
+
+	/* Obtain lock while changing crash information */
+	if (!kexec_trylock()) {
+		pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n");
+		return;
+	}
+
+	/* Check kdump is not loaded */
+	if (!kexec_crash_image)
+		goto out;
+
+	image = kexec_crash_image;
+
+	if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
+		hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
+		pr_debug("hp_action %u, cpu %u\n", hp_action, cpu);
+	else
+		pr_debug("hp_action %u\n", hp_action);
+
+	/*
+	 * When the struct kimage is allocated, the elfcorehdr_index
+	 * is set to -1. Find the segment containing the elfcorehdr,
+	 * if not already found. This works for both the kexec_load
+	 * and kexec_file_load paths.
+	 */
+	if (image->elfcorehdr_index < 0) {
+		unsigned long mem;
+		unsigned char *ptr;
+		unsigned int n;
+
+		for (n = 0; n < image->nr_segments; n++) {
+			mem = image->segment[n].mem;
+			ptr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT));
+			if (ptr) {
+				/* The segment containing elfcorehdr */
+				if (memcmp(ptr, ELFMAG, SELFMAG) == 0)
+					image->elfcorehdr_index = (int)n;
+				kunmap_local(ptr);
+			}
+		}
+	}
+
+	if (image->elfcorehdr_index < 0) {
+		pr_err("unable to locate elfcorehdr segment");
+		goto out;
+	}
+
+	/* Needed in order for the segments to be updated */
+	arch_kexec_unprotect_crashkres();
+
+	/* Differentiate between normal load and hotplug update */
+	image->hp_action = hp_action;
+
+	/* Now invoke arch-specific update handler */
+	arch_crash_handle_hotplug_event(image);
+
+	/* No longer handling a hotplug event */
+	image->hp_action = KEXEC_CRASH_HP_NONE;
+	image->elfcorehdr_updated = true;
+
+	/* Change back to read-only */
+	arch_kexec_protect_crashkres();
+
+out:
+	/* Release lock now that update complete */
+	kexec_unlock();
+}
+
+static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
+{
+	switch (val) {
+	case MEM_ONLINE:
+		crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY,
+			KEXEC_CRASH_HP_INVALID_CPU);
+		break;
+
+	case MEM_OFFLINE:
+		crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY,
+			KEXEC_CRASH_HP_INVALID_CPU);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block crash_memhp_nb = {
+	.notifier_call = crash_memhp_notifier,
+	.priority = 0
+};
+
+static int crash_cpuhp_online(unsigned int cpu)
+{
+	crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu);
+	return 0;
+}
+
+static int crash_cpuhp_offline(unsigned int cpu)
+{
+	crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu);
+	return 0;
+}
+
+static int __init crash_hotplug_init(void)
+{
+	int result = 0;
+
+	if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
+		register_memory_notifier(&crash_memhp_nb);
+
+	if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
+		result = cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN,
+			"crash/cpuhp", crash_cpuhp_online, crash_cpuhp_offline);
+	}
+
+	return result;
+}
+
+subsys_initcall(crash_hotplug_init);
+#endif
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 3d578c6fefee..8296d019737c 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -277,6 +277,12 @@  struct kimage *do_kimage_alloc_init(void)
 	/* Initialize the list of unusable pages */
 	INIT_LIST_HEAD(&image->unusable_pages);
 
+#ifdef CONFIG_CRASH_HOTPLUG
+	image->hp_action = KEXEC_CRASH_HP_NONE;
+	image->elfcorehdr_index = -1;
+	image->elfcorehdr_updated = false;
+#endif
+
 	return image;
 }