[v14,3/7] crash: add generic infrastructure for crash hotplug support

Message ID 20221116214643.6384-4-eric.devolder@oracle.com
State New
Headers
Series crash: Kernel handling of CPU and memory hot un/plug |

Commit Message

Eric DeVolder Nov. 16, 2022, 9:46 p.m. UTC
  CPU and memory change notifications are received in order to
regenerate the elfcorehdr.

To support cpu hotplug, a callback is registered to capture the
CPUHP_AP_ONLINE_DYN online and offline events via
cpuhp_setup_state_nocalls().

To support memory hotplug, a notifier is registered to capture the
MEM_ONLINE and MEM_OFFLINE events via register_memory_notifier().

The cpu callback and memory notifiers call handle_hotplug_event()
which performs needed tasks and then dispatches the event to the
architecture specific arch_crash_handle_hotplug_event(). During the
process, the kexec_lock is held.

Signed-off-by: Eric DeVolder <eric.devolder@oracle.com>
---
 include/linux/crash_core.h |   8 +++
 include/linux/kexec.h      |  36 ++++++++++
 kernel/crash_core.c        | 139 +++++++++++++++++++++++++++++++++++++
 3 files changed, 183 insertions(+)
  

Comments

Baoquan He Nov. 25, 2022, 3:26 a.m. UTC | #1
On 11/16/22 at 04:46pm, Eric DeVolder wrote:
......
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index ebf46c3b8f8b..b4dbc21f9081 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -32,6 +32,7 @@ extern note_buf_t __percpu *crash_notes;
>  #include <linux/compat.h>
>  #include <linux/ioport.h>
>  #include <linux/module.h>
> +#include <linux/highmem.h>
>  #include <asm/kexec.h>
>  
>  /* Verify architecture specific macros are defined */
> @@ -374,6 +375,13 @@ struct kimage {
>  	struct purgatory_info purgatory_info;
>  #endif
>  
> +#ifdef CONFIG_CRASH_HOTPLUG

This kernel config CRASH_HOTPLUG is added in patch 7, but we have used
it in the previous patch, not sure if this is acceptable.

> +	bool hotplug_event;
> +	unsigned int offlinecpu;
> +	bool elfcorehdr_index_valid;
> +	int elfcorehdr_index;
> +#endif
> +
>  #ifdef CONFIG_IMA_KEXEC
>  	/* Virtual address of IMA measurement buffer for kexec syscall */
>  	void *ima_buffer;
> @@ -503,6 +511,34 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g
>  static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
>  #endif
>  
> +#ifndef arch_map_crash_pages
> +/*
> + * NOTE: The addresses and sizes passed to this routine have
> + * already been fully aligned on page boundaries. There is no
> + * need for massaging the address or size.
> + */
> +static inline void *arch_map_crash_pages(unsigned long paddr,
> +					unsigned long size)
> +{
> +	if (size > 0)
> +		return kmap_local_page(pfn_to_page(paddr >> PAGE_SHIFT));
> +	else
> +		return NULL;
> +}
> +#endif
> +
> +#ifndef arch_unmap_crash_pages
> +static inline void arch_unmap_crash_pages(void *ptr)
> +{
> +	if (ptr)
> +		kunmap_local(ptr);
> +}
> +#endif
> +
> +#ifndef arch_crash_handle_hotplug_event
> +static inline void arch_crash_handle_hotplug_event(struct kimage *image) { }
> +#endif
> +
>  #else /* !CONFIG_KEXEC_CORE */
>  struct pt_regs;
>  struct task_struct;
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index 8c648fd5897a..4e7221226976 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -11,6 +11,8 @@
>  #include <linux/vmalloc.h>
>  #include <linux/sizes.h>
>  #include <linux/kexec.h>
> +#include <linux/memory.h>
> +#include <linux/cpuhotplug.h>
>  
>  #include <asm/page.h>
>  #include <asm/sections.h>
> @@ -18,6 +20,7 @@
>  #include <crypto/sha1.h>
>  
>  #include "kallsyms_internal.h"
> +#include "kexec_internal.h"
>  
>  /* vmcoreinfo stuff */
>  unsigned char *vmcoreinfo_data;
> @@ -612,3 +615,139 @@ static int __init crash_save_vmcoreinfo_init(void)
>  }
>  
>  subsys_initcall(crash_save_vmcoreinfo_init);
> +
> +#ifdef CONFIG_CRASH_HOTPLUG
> +#undef pr_fmt
> +#define pr_fmt(fmt) "crash hp: " fmt
> +/*
> + * To accurately reflect hot un/plug changes, the elfcorehdr (which
> + * is passed to the crash kernel via the elfcorehdr= parameter)
> + * must be updated with the new list of CPUs and memories.
> + *
> + * In order to make changes to elfcorehdr, two conditions are needed:
> + * First, the segment containing the elfcorehdr must be large enough
> + * to permit a growing number of resources; the elfcorehdr memory size
> + * is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES.
> + * Second, purgatory must explicitly exclude the elfcorehdr from the
> + * list of segments it checks (since the elfcorehdr changes and thus
> + * would require an update to purgatory itself to update the digest).
> + */
> +static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
> +{
> +	/* Obtain lock while changing crash information */
> +	if (kexec_trylock()) {
> +
> +		/* Check kdump is loaded */
> +		if (kexec_crash_image) {
> +			struct kimage *image = kexec_crash_image;
> +
> +			if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
> +				hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
> +				pr_debug("hp_action %u, cpu %u\n", hp_action, cpu);
> +			else
> +				pr_debug("hp_action %u\n", hp_action);
> +
> +			/*
> +			 * When the struct kimage is allocated, it is wiped to zero, so
> +			 * the elfcorehdr_index_valid defaults to false. Find the
> +			 * segment containing the elfcorehdr, if not already found.
> +			 * This works for both the kexec_load and kexec_file_load paths.
> +			 */
> +			if (!image->elfcorehdr_index_valid) {
> +				unsigned long mem, memsz;
> +				unsigned char *ptr;
> +				unsigned int n;
> +
> +				for (n = 0; n < image->nr_segments; n++) {
> +					mem = image->segment[n].mem;
> +					memsz = image->segment[n].memsz;
> +					ptr = arch_map_crash_pages(mem, memsz);
> +					if (ptr) {
> +						/* The segment containing elfcorehdr */
> +						if (memcmp(ptr, ELFMAG, SELFMAG) == 0) {
> +							image->elfcorehdr_index = (int)n;
> +							image->elfcorehdr_index_valid = true;
> +						}
> +					}
> +					arch_unmap_crash_pages((void *)ptr);
> +				}
> +			}
> +
> +			if (!image->elfcorehdr_index_valid) {
> +				pr_err("unable to locate elfcorehdr segment");
> +				goto out;
> +			}
> +
> +			/* Needed in order for the segments to be updated */
> +			arch_kexec_unprotect_crashkres();
> +
> +			/* Flag to differentiate between normal load and hotplug */
> +			image->hotplug_event = true;
> +
> +			/* Now invoke arch-specific update handler */
> +			arch_crash_handle_hotplug_event(image);
> +
> +			/* No longer handling a hotplug event */
> +			image->hotplug_event = false;
> +
> +			/* Change back to read-only */
> +			arch_kexec_protect_crashkres();
> +		}
> +
> +out:
> +		/* Release lock now that update complete */
> +		kexec_unlock();
> +	}
> +}
> +
> +static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
> +{
> +	switch (val) {
> +	case MEM_ONLINE:
> +		handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY,
> +			KEXEC_CRASH_HP_INVALID_CPU);
> +		break;
> +
> +	case MEM_OFFLINE:
> +		handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY,
> +			KEXEC_CRASH_HP_INVALID_CPU);
> +		break;
> +	}
> +	return NOTIFY_OK;
> +}
> +
> +static struct notifier_block crash_memhp_nb = {
> +	.notifier_call = crash_memhp_notifier,
> +	.priority = 0
> +};
> +
> +static int crash_cpuhp_online(unsigned int cpu)
> +{
> +	handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu);
> +	return 0;
> +}
> +
> +static int crash_cpuhp_offline(unsigned int cpu)
> +{
> +	handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu);
> +	return 0;
> +}
> +
> +static int __init crash_hotplug_init(void)
> +{
> +	int result = 0;
> +
> +	if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
> +		register_memory_notifier(&crash_memhp_nb);
> +
> +	if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
> +		result = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
> +						   "crash/cpuhp",
> +						   crash_cpuhp_online,
> +						   crash_cpuhp_offline);
> +
> +	return result;
> +}
> +
> +subsys_initcall(crash_hotplug_init);
> +#endif
> -- 
> 2.31.1
>
  
Eric DeVolder Nov. 28, 2022, 3:46 p.m. UTC | #2
On 11/24/22 21:26, Baoquan He wrote:
> On 11/16/22 at 04:46pm, Eric DeVolder wrote:
> ......
>> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
>> index ebf46c3b8f8b..b4dbc21f9081 100644
>> --- a/include/linux/kexec.h
>> +++ b/include/linux/kexec.h
>> @@ -32,6 +32,7 @@ extern note_buf_t __percpu *crash_notes;
>>   #include <linux/compat.h>
>>   #include <linux/ioport.h>
>>   #include <linux/module.h>
>> +#include <linux/highmem.h>
>>   #include <asm/kexec.h>
>>   
>>   /* Verify architecture specific macros are defined */
>> @@ -374,6 +375,13 @@ struct kimage {
>>   	struct purgatory_info purgatory_info;
>>   #endif
>>   
>> +#ifdef CONFIG_CRASH_HOTPLUG
> 
> This kernel config CRASH_HOTPLUG is added in patch 7, but we have used
> it in the previous patch, not sure if this is acceptable.
> 
I wasn't sure what to do here either. Patch 7 is the x86 arch-specific support patch, and 
CRASH_HOTPLUG is introduced in arch/x86/Kconfig. I did look at introducing CRASH_HOTPLUG as a 
generic/non-arch-specific option, but no location seemed appropriate given HOTPLUG_CPU is 
arch-specific and MEMORY_HOTPLUG is in mm/Kconfig.

This doesn't break bisect, but as you point out, not sure if the location in patch 7 is acceptable.
I'm not really sure how to resolve the question.

eric

>> +	bool hotplug_event;
>> +	unsigned int offlinecpu;
>> +	bool elfcorehdr_index_valid;
>> +	int elfcorehdr_index;
>> +#endif
>> +
>>   #ifdef CONFIG_IMA_KEXEC
>>   	/* Virtual address of IMA measurement buffer for kexec syscall */
>>   	void *ima_buffer;
>> @@ -503,6 +511,34 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g
>>   static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
>>   #endif
>>   
>> +#ifndef arch_map_crash_pages
>> +/*
>> + * NOTE: The addresses and sizes passed to this routine have
>> + * already been fully aligned on page boundaries. There is no
>> + * need for massaging the address or size.
>> + */
>> +static inline void *arch_map_crash_pages(unsigned long paddr,
>> +					unsigned long size)
>> +{
>> +	if (size > 0)
>> +		return kmap_local_page(pfn_to_page(paddr >> PAGE_SHIFT));
>> +	else
>> +		return NULL;
>> +}
>> +#endif
>> +
>> +#ifndef arch_unmap_crash_pages
>> +static inline void arch_unmap_crash_pages(void *ptr)
>> +{
>> +	if (ptr)
>> +		kunmap_local(ptr);
>> +}
>> +#endif
>> +
>> +#ifndef arch_crash_handle_hotplug_event
>> +static inline void arch_crash_handle_hotplug_event(struct kimage *image) { }
>> +#endif
>> +
>>   #else /* !CONFIG_KEXEC_CORE */
>>   struct pt_regs;
>>   struct task_struct;
>> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
>> index 8c648fd5897a..4e7221226976 100644
>> --- a/kernel/crash_core.c
>> +++ b/kernel/crash_core.c
>> @@ -11,6 +11,8 @@
>>   #include <linux/vmalloc.h>
>>   #include <linux/sizes.h>
>>   #include <linux/kexec.h>
>> +#include <linux/memory.h>
>> +#include <linux/cpuhotplug.h>
>>   
>>   #include <asm/page.h>
>>   #include <asm/sections.h>
>> @@ -18,6 +20,7 @@
>>   #include <crypto/sha1.h>
>>   
>>   #include "kallsyms_internal.h"
>> +#include "kexec_internal.h"
>>   
>>   /* vmcoreinfo stuff */
>>   unsigned char *vmcoreinfo_data;
>> @@ -612,3 +615,139 @@ static int __init crash_save_vmcoreinfo_init(void)
>>   }
>>   
>>   subsys_initcall(crash_save_vmcoreinfo_init);
>> +
>> +#ifdef CONFIG_CRASH_HOTPLUG
>> +#undef pr_fmt
>> +#define pr_fmt(fmt) "crash hp: " fmt
>> +/*
>> + * To accurately reflect hot un/plug changes, the elfcorehdr (which
>> + * is passed to the crash kernel via the elfcorehdr= parameter)
>> + * must be updated with the new list of CPUs and memories.
>> + *
>> + * In order to make changes to elfcorehdr, two conditions are needed:
>> + * First, the segment containing the elfcorehdr must be large enough
>> + * to permit a growing number of resources; the elfcorehdr memory size
>> + * is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES.
>> + * Second, purgatory must explicitly exclude the elfcorehdr from the
>> + * list of segments it checks (since the elfcorehdr changes and thus
>> + * would require an update to purgatory itself to update the digest).
>> + */
>> +static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
>> +{
>> +	/* Obtain lock while changing crash information */
>> +	if (kexec_trylock()) {
>> +
>> +		/* Check kdump is loaded */
>> +		if (kexec_crash_image) {
>> +			struct kimage *image = kexec_crash_image;
>> +
>> +			if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
>> +				hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
>> +				pr_debug("hp_action %u, cpu %u\n", hp_action, cpu);
>> +			else
>> +				pr_debug("hp_action %u\n", hp_action);
>> +
>> +			/*
>> +			 * When the struct kimage is allocated, it is wiped to zero, so
>> +			 * the elfcorehdr_index_valid defaults to false. Find the
>> +			 * segment containing the elfcorehdr, if not already found.
>> +			 * This works for both the kexec_load and kexec_file_load paths.
>> +			 */
>> +			if (!image->elfcorehdr_index_valid) {
>> +				unsigned long mem, memsz;
>> +				unsigned char *ptr;
>> +				unsigned int n;
>> +
>> +				for (n = 0; n < image->nr_segments; n++) {
>> +					mem = image->segment[n].mem;
>> +					memsz = image->segment[n].memsz;
>> +					ptr = arch_map_crash_pages(mem, memsz);
>> +					if (ptr) {
>> +						/* The segment containing elfcorehdr */
>> +						if (memcmp(ptr, ELFMAG, SELFMAG) == 0) {
>> +							image->elfcorehdr_index = (int)n;
>> +							image->elfcorehdr_index_valid = true;
>> +						}
>> +					}
>> +					arch_unmap_crash_pages((void *)ptr);
>> +				}
>> +			}
>> +
>> +			if (!image->elfcorehdr_index_valid) {
>> +				pr_err("unable to locate elfcorehdr segment");
>> +				goto out;
>> +			}
>> +
>> +			/* Needed in order for the segments to be updated */
>> +			arch_kexec_unprotect_crashkres();
>> +
>> +			/* Flag to differentiate between normal load and hotplug */
>> +			image->hotplug_event = true;
>> +
>> +			/* Now invoke arch-specific update handler */
>> +			arch_crash_handle_hotplug_event(image);
>> +
>> +			/* No longer handling a hotplug event */
>> +			image->hotplug_event = false;
>> +
>> +			/* Change back to read-only */
>> +			arch_kexec_protect_crashkres();
>> +		}
>> +
>> +out:
>> +		/* Release lock now that update complete */
>> +		kexec_unlock();
>> +	}
>> +}
>> +
>> +static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
>> +{
>> +	switch (val) {
>> +	case MEM_ONLINE:
>> +		handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY,
>> +			KEXEC_CRASH_HP_INVALID_CPU);
>> +		break;
>> +
>> +	case MEM_OFFLINE:
>> +		handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY,
>> +			KEXEC_CRASH_HP_INVALID_CPU);
>> +		break;
>> +	}
>> +	return NOTIFY_OK;
>> +}
>> +
>> +static struct notifier_block crash_memhp_nb = {
>> +	.notifier_call = crash_memhp_notifier,
>> +	.priority = 0
>> +};
>> +
>> +static int crash_cpuhp_online(unsigned int cpu)
>> +{
>> +	handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu);
>> +	return 0;
>> +}
>> +
>> +static int crash_cpuhp_offline(unsigned int cpu)
>> +{
>> +	handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu);
>> +	return 0;
>> +}
>> +
>> +static int __init crash_hotplug_init(void)
>> +{
>> +	int result = 0;
>> +
>> +	if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
>> +		register_memory_notifier(&crash_memhp_nb);
>> +
>> +	if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
>> +		result = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
>> +						   "crash/cpuhp",
>> +						   crash_cpuhp_online,
>> +						   crash_cpuhp_offline);
>> +
>> +	return result;
>> +}
>> +
>> +subsys_initcall(crash_hotplug_init);
>> +#endif
>> -- 
>> 2.31.1
>>
>
  
Baoquan He Nov. 29, 2022, 12:43 a.m. UTC | #3
On 11/28/22 at 09:46am, Eric DeVolder wrote:
> 
> 
> On 11/24/22 21:26, Baoquan He wrote:
> > On 11/16/22 at 04:46pm, Eric DeVolder wrote:
> > ......
> > > diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> > > index ebf46c3b8f8b..b4dbc21f9081 100644
> > > --- a/include/linux/kexec.h
> > > +++ b/include/linux/kexec.h
> > > @@ -32,6 +32,7 @@ extern note_buf_t __percpu *crash_notes;
> > >   #include <linux/compat.h>
> > >   #include <linux/ioport.h>
> > >   #include <linux/module.h>
> > > +#include <linux/highmem.h>
> > >   #include <asm/kexec.h>
> > >   /* Verify architecture specific macros are defined */
> > > @@ -374,6 +375,13 @@ struct kimage {
> > >   	struct purgatory_info purgatory_info;
> > >   #endif
> > > +#ifdef CONFIG_CRASH_HOTPLUG
> > 
> > This kernel config CRASH_HOTPLUG is added in patch 7, but we have used
> > it in the previous patch, not sure if this is acceptable.
> > 
> I wasn't sure what to do here either. Patch 7 is the x86 arch-specific
> support patch, and CRASH_HOTPLUG is introduced in arch/x86/Kconfig. I did
> look at introducing CRASH_HOTPLUG as a generic/non-arch-specific option, but
> no location seemed appropriate given HOTPLUG_CPU is arch-specific and
> MEMORY_HOTPLUG is in mm/Kconfig.

arch/Kconfig?

Because CRASH_CORE/KEXEC_CORE are defined there.

> 
> This doesn't break bisect, but as you point out, not sure if the location in patch 7 is acceptable.
> I'm not really sure how to resolve the question.

Hmm, since it's bisect-able, seems doesn't break rule. I could be too
sensitive. Do we have a precendent like this, to strengthen our
confidence?

If no concern from other people, it's also fine to me.
  
Borislav Petkov Dec. 7, 2022, 10 a.m. UTC | #4
On Fri, Nov 25, 2022 at 11:26:53AM +0800, Baoquan He wrote:
> This kernel config CRASH_HOTPLUG is added in patch 7, but we have used
> it in the previous patch, not sure if this is acceptable.

Why would it not be acceptable?
  
Borislav Petkov Dec. 7, 2022, 10:15 a.m. UTC | #5
On Wed, Nov 16, 2022 at 04:46:39PM -0500, Eric DeVolder wrote:
> +#ifndef arch_map_crash_pages
> +/*
> + * NOTE: The addresses and sizes passed to this routine have
> + * already been fully aligned on page boundaries. There is no
> + * need for massaging the address or size.
> + */
> +static inline void *arch_map_crash_pages(unsigned long paddr,
> +					unsigned long size)
> +{
> +	if (size > 0)
> +		return kmap_local_page(pfn_to_page(paddr >> PAGE_SHIFT));
> +	else
> +		return NULL;
> +}
> +#endif
> +
> +#ifndef arch_unmap_crash_pages
> +static inline void arch_unmap_crash_pages(void *ptr)
> +{
> +	if (ptr)
> +		kunmap_local(ptr);
> +}
> +#endif

Why is that function still here and why aren't you calling
kmap_local_page() simply?
  
Baoquan He Dec. 7, 2022, 12:36 p.m. UTC | #6
On 12/07/22 at 11:00am, Borislav Petkov wrote:
> On Fri, Nov 25, 2022 at 11:26:53AM +0800, Baoquan He wrote:
> > This kernel config CRASH_HOTPLUG is added in patch 7, but we have used
> > it in the previous patch, not sure if this is acceptable.
> 
> Why would it not be acceptable?

Below is my last reply to Eric about my thinking on this. That would be
great if it's a normal situation when adding Kconfig item, I am happy to
learn this if it's confirmed normal.

=====
Hmm, since it's bisect-able, seems doesn't break rule. I could be too
sensitive. Do we have a precendent like this, to strengthen our
confidence?

If no concern from other people, it's also fine to me.
=====

Thanks
Baoquan
  
Borislav Petkov Dec. 7, 2022, 12:42 p.m. UTC | #7
On Wed, Dec 07, 2022 at 08:36:13PM +0800, Baoquan He wrote:
> Below is my last reply to Eric about my thinking on this.

Yes, I saw that.

So think about it: if a CONFIG_ item is not present, what does that mean
for the code which is enclosed around it?
  
Baoquan He Dec. 7, 2022, 1:57 p.m. UTC | #8
On 12/07/22 at 01:42pm, Borislav Petkov wrote:
> On Wed, Dec 07, 2022 at 08:36:13PM +0800, Baoquan He wrote:
> > Below is my last reply to Eric about my thinking on this.
> 
> Yes, I saw that.
> 
> So think about it: if a CONFIG_ item is not present, what does that mean
> for the code which is enclosed around it?

Ignored by compiler.

I thought we usually need to introduce the kernel config option, then
add code related to it, so that is a wrong idea. It would be helpful to
tell this somewhere in document.
  
Borislav Petkov Dec. 7, 2022, 3:56 p.m. UTC | #9
On Wed, Dec 07, 2022 at 09:57:48PM +0800, Baoquan He wrote:
> I thought we usually need to introduce the kernel config option, then
> add code related to it, so that is a wrong idea.

It depends: sometimes it is prudent to add the code behind an ifdeffery
first but have it not being buildable so that you don't have to deal
with build breakages but rather concentrate on adding the facilities
first.

And you add the Kconfig item only in the end where everything is in
place and it should build properly then.

> It would be helpful to tell this somewhere in document.

Feel free. I mean, it is pretty obvious but if it helps, it wouldn't hurt.
  
Baoquan He Dec. 8, 2022, 4:03 a.m. UTC | #10
On 12/07/22 at 04:56pm, Borislav Petkov wrote:
> On Wed, Dec 07, 2022 at 09:57:48PM +0800, Baoquan He wrote:
> > I thought we usually need to introduce the kernel config option, then
> > add code related to it, so that is a wrong idea.
> 
> It depends: sometimes it is prudent to add the code behind an ifdeffery
> first but have it not being buildable so that you don't have to deal
> with build breakages but rather concentrate on adding the facilities
> first.
> 
> And you add the Kconfig item only in the end where everything is in
> place and it should build properly then.

I see. Now it's pretty clear to us. Thanks a lot.

> 
> > It would be helpful to tell this somewhere in document.
> 
> Feel free. I mean, it is pretty obvious but if it helps, it wouldn't hurt.

OK, at least people tracking this thread got this now.
  
Eric DeVolder Dec. 8, 2022, 7:04 p.m. UTC | #11
On 12/7/22 04:15, Borislav Petkov wrote:
> On Wed, Nov 16, 2022 at 04:46:39PM -0500, Eric DeVolder wrote:
>> +#ifndef arch_map_crash_pages
>> +/*
>> + * NOTE: The addresses and sizes passed to this routine have
>> + * already been fully aligned on page boundaries. There is no
>> + * need for massaging the address or size.
>> + */
>> +static inline void *arch_map_crash_pages(unsigned long paddr,
>> +					unsigned long size)
>> +{
>> +	if (size > 0)
>> +		return kmap_local_page(pfn_to_page(paddr >> PAGE_SHIFT));
>> +	else
>> +		return NULL;
>> +}
>> +#endif
>> +
>> +#ifndef arch_unmap_crash_pages
>> +static inline void arch_unmap_crash_pages(void *ptr)
>> +{
>> +	if (ptr)
>> +		kunmap_local(ptr);
>> +}
>> +#endif
> 
> Why is that function still here and why aren't you calling
> kmap_local_page() simply?
> 

Corrected!
eric
  

Patch

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index de62a722431e..a270f8660538 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -84,4 +84,12 @@  int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
 int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
 		unsigned long long *crash_size, unsigned long long *crash_base);
 
+#define KEXEC_CRASH_HP_REMOVE_CPU		0
+#define KEXEC_CRASH_HP_ADD_CPU			1
+#define KEXEC_CRASH_HP_REMOVE_MEMORY		2
+#define KEXEC_CRASH_HP_ADD_MEMORY		3
+#define KEXEC_CRASH_HP_INVALID_CPU		-1U
+
+struct kimage;
+
 #endif /* LINUX_CRASH_CORE_H */
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index ebf46c3b8f8b..b4dbc21f9081 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -32,6 +32,7 @@  extern note_buf_t __percpu *crash_notes;
 #include <linux/compat.h>
 #include <linux/ioport.h>
 #include <linux/module.h>
+#include <linux/highmem.h>
 #include <asm/kexec.h>
 
 /* Verify architecture specific macros are defined */
@@ -374,6 +375,13 @@  struct kimage {
 	struct purgatory_info purgatory_info;
 #endif
 
+#ifdef CONFIG_CRASH_HOTPLUG
+	bool hotplug_event;
+	unsigned int offlinecpu;
+	bool elfcorehdr_index_valid;
+	int elfcorehdr_index;
+#endif
+
 #ifdef CONFIG_IMA_KEXEC
 	/* Virtual address of IMA measurement buffer for kexec syscall */
 	void *ima_buffer;
@@ -503,6 +511,34 @@  static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g
 static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
 #endif
 
+#ifndef arch_map_crash_pages
+/*
+ * NOTE: The addresses and sizes passed to this routine have
+ * already been fully aligned on page boundaries. There is no
+ * need for massaging the address or size.
+ */
+static inline void *arch_map_crash_pages(unsigned long paddr,
+					unsigned long size)
+{
+	if (size > 0)
+		return kmap_local_page(pfn_to_page(paddr >> PAGE_SHIFT));
+	else
+		return NULL;
+}
+#endif
+
+#ifndef arch_unmap_crash_pages
+static inline void arch_unmap_crash_pages(void *ptr)
+{
+	if (ptr)
+		kunmap_local(ptr);
+}
+#endif
+
+#ifndef arch_crash_handle_hotplug_event
+static inline void arch_crash_handle_hotplug_event(struct kimage *image) { }
+#endif
+
 #else /* !CONFIG_KEXEC_CORE */
 struct pt_regs;
 struct task_struct;
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 8c648fd5897a..4e7221226976 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -11,6 +11,8 @@ 
 #include <linux/vmalloc.h>
 #include <linux/sizes.h>
 #include <linux/kexec.h>
+#include <linux/memory.h>
+#include <linux/cpuhotplug.h>
 
 #include <asm/page.h>
 #include <asm/sections.h>
@@ -18,6 +20,7 @@ 
 #include <crypto/sha1.h>
 
 #include "kallsyms_internal.h"
+#include "kexec_internal.h"
 
 /* vmcoreinfo stuff */
 unsigned char *vmcoreinfo_data;
@@ -612,3 +615,139 @@  static int __init crash_save_vmcoreinfo_init(void)
 }
 
 subsys_initcall(crash_save_vmcoreinfo_init);
+
+#ifdef CONFIG_CRASH_HOTPLUG
+#undef pr_fmt
+#define pr_fmt(fmt) "crash hp: " fmt
+/*
+ * To accurately reflect hot un/plug changes, the elfcorehdr (which
+ * is passed to the crash kernel via the elfcorehdr= parameter)
+ * must be updated with the new list of CPUs and memories.
+ *
+ * In order to make changes to elfcorehdr, two conditions are needed:
+ * First, the segment containing the elfcorehdr must be large enough
+ * to permit a growing number of resources; the elfcorehdr memory size
+ * is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES.
+ * Second, purgatory must explicitly exclude the elfcorehdr from the
+ * list of segments it checks (since the elfcorehdr changes and thus
+ * would require an update to purgatory itself to update the digest).
+ */
+static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
+{
+	/* Obtain lock while changing crash information */
+	if (kexec_trylock()) {
+
+		/* Check kdump is loaded */
+		if (kexec_crash_image) {
+			struct kimage *image = kexec_crash_image;
+
+			if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
+				hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
+				pr_debug("hp_action %u, cpu %u\n", hp_action, cpu);
+			else
+				pr_debug("hp_action %u\n", hp_action);
+
+			/*
+			 * When the struct kimage is allocated, it is wiped to zero, so
+			 * the elfcorehdr_index_valid defaults to false. Find the
+			 * segment containing the elfcorehdr, if not already found.
+			 * This works for both the kexec_load and kexec_file_load paths.
+			 */
+			if (!image->elfcorehdr_index_valid) {
+				unsigned long mem, memsz;
+				unsigned char *ptr;
+				unsigned int n;
+
+				for (n = 0; n < image->nr_segments; n++) {
+					mem = image->segment[n].mem;
+					memsz = image->segment[n].memsz;
+					ptr = arch_map_crash_pages(mem, memsz);
+					if (ptr) {
+						/* The segment containing elfcorehdr */
+						if (memcmp(ptr, ELFMAG, SELFMAG) == 0) {
+							image->elfcorehdr_index = (int)n;
+							image->elfcorehdr_index_valid = true;
+						}
+					}
+					arch_unmap_crash_pages((void *)ptr);
+				}
+			}
+
+			if (!image->elfcorehdr_index_valid) {
+				pr_err("unable to locate elfcorehdr segment");
+				goto out;
+			}
+
+			/* Needed in order for the segments to be updated */
+			arch_kexec_unprotect_crashkres();
+
+			/* Flag to differentiate between normal load and hotplug */
+			image->hotplug_event = true;
+
+			/* Now invoke arch-specific update handler */
+			arch_crash_handle_hotplug_event(image);
+
+			/* No longer handling a hotplug event */
+			image->hotplug_event = false;
+
+			/* Change back to read-only */
+			arch_kexec_protect_crashkres();
+		}
+
+out:
+		/* Release lock now that update complete */
+		kexec_unlock();
+	}
+}
+
+static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
+{
+	switch (val) {
+	case MEM_ONLINE:
+		handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY,
+			KEXEC_CRASH_HP_INVALID_CPU);
+		break;
+
+	case MEM_OFFLINE:
+		handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY,
+			KEXEC_CRASH_HP_INVALID_CPU);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block crash_memhp_nb = {
+	.notifier_call = crash_memhp_notifier,
+	.priority = 0
+};
+
+static int crash_cpuhp_online(unsigned int cpu)
+{
+	handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu);
+	return 0;
+}
+
+static int crash_cpuhp_offline(unsigned int cpu)
+{
+	handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu);
+	return 0;
+}
+
+static int __init crash_hotplug_init(void)
+{
+	int result = 0;
+
+	if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
+		register_memory_notifier(&crash_memhp_nb);
+
+	if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
+		result = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+						   "crash/cpuhp",
+						   crash_cpuhp_online,
+						   crash_cpuhp_offline);
+
+	return result;
+}
+
+subsys_initcall(crash_hotplug_init);
+#endif