[v4,2/2] arm64: kdump: Support crashkernel=X fall back to reserve region above DMA zones

Message ID 20221116121044.1690-3-thunder.leizhen@huawei.com
State New
Headers
Series arm64: kdump: Function supplement and performance optimization |

Commit Message

Zhen Lei Nov. 16, 2022, 12:10 p.m. UTC
  For crashkernel=X without '@offset', select a region within DMA zones
first, and fall back to reserve region above DMA zones. This allows
users to use the same configuration on multiple platforms.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Acked-by: Baoquan He <bhe@redhat.com>
---
 Documentation/admin-guide/kernel-parameters.txt |  2 +-
 arch/arm64/mm/init.c                            | 17 ++++++++++++++++-
 2 files changed, 17 insertions(+), 2 deletions(-)
  

Comments

Catalin Marinas Nov. 16, 2022, 3:10 p.m. UTC | #1
On Wed, Nov 16, 2022 at 08:10:44PM +0800, Zhen Lei wrote:
> For crashkernel=X without '@offset', select a region within DMA zones
> first, and fall back to reserve region above DMA zones. This allows
> users to use the same configuration on multiple platforms.
> 
> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
> Acked-by: Baoquan He <bhe@redhat.com>

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
  
Baoquan He Nov. 21, 2022, 8:31 a.m. UTC | #2
On 11/16/22 at 08:10pm, Zhen Lei wrote:
> For crashkernel=X without '@offset', select a region within DMA zones
> first, and fall back to reserve region above DMA zones. This allows
> users to use the same configuration on multiple platforms.
> 
> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
> Acked-by: Baoquan He <bhe@redhat.com>
> ---
>  Documentation/admin-guide/kernel-parameters.txt |  2 +-
>  arch/arm64/mm/init.c                            | 17 ++++++++++++++++-
>  2 files changed, 17 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index a7b7147447b8bf8..ef6d922ed26b9dc 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -842,7 +842,7 @@
>  			memory region [offset, offset + size] for that kernel
>  			image. If '@offset' is omitted, then a suitable offset
>  			is selected automatically.
> -			[KNL, X86-64] Select a region under 4G first, and
> +			[KNL, X86-64, ARM64] Select a region under 4G first, and
>  			fall back to reserve region above 4G when '@offset'
>  			hasn't been specified.
>  			See Documentation/admin-guide/kdump/kdump.rst for further details.
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index ba7227179822d10..58a0bb2c17f18cf 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -132,6 +132,7 @@ static void __init reserve_crashkernel(void)
>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>  	char *cmdline = boot_command_line;
>  	int ret;
> +	bool fixed_base = false;
>  
>  	if (!IS_ENABLED(CONFIG_KEXEC_CORE))
>  		return;
> @@ -163,12 +164,26 @@ static void __init reserve_crashkernel(void)
>  	crash_size = PAGE_ALIGN(crash_size);
>  
>  	/* User specifies base address explicitly. */
> -	if (crash_base)
> +	if (crash_base) {
> +		fixed_base = true;
>  		crash_max = crash_base + crash_size;
> +	}
>  
> +retry:
>  	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>  					       crash_base, crash_max);

This pachset looks good to me. While we observed a unexpected case,
where a reserved region cross the high and low meomry region. I noticed
Catalin has pointed that out. Even with the suggested code, we could
have the kernel parameters like: crashkernel=512M,high
crashkernel=128M,low, we finally have [4G-126M, 4G+386M], [1G, 1G+128M]
regions in running kernel. This looks strange.

I am wondering if we can specify explicit search_base in
memblock_phys_alloc_range() to avoid above case. Like this,
crashkernel,high region will only exist in high memory, crashkernel,low
region only exists in low memory region. I made a draft patch based on
this patchset to present what the code looks like.

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 58a0bb2c17f1..fd9d35e17a62 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -127,7 +127,7 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
  */
 static void __init reserve_crashkernel(void)
 {
-	unsigned long long crash_base, crash_size;
+	unsigned long long crash_base, crash_size, search_base;
 	unsigned long long crash_low_size = 0;
 	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
 	char *cmdline = boot_command_line;
@@ -155,6 +155,7 @@ static void __init reserve_crashkernel(void)
 		else if (ret)
 			return;
 
+		search_base = CRASH_ADDR_LOW_MAX;
 		crash_max = CRASH_ADDR_HIGH_MAX;
 	} else if (ret || !crash_size) {
 		/* The specified value is invalid */
@@ -166,12 +167,13 @@ static void __init reserve_crashkernel(void)
 	/* User specifies base address explicitly. */
 	if (crash_base) {
 		fixed_base = true;
+		search_base = crash_base;
 		crash_max = crash_base + crash_size;
 	}
 
 retry:
 	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
-					       crash_base, crash_max);
+					       search_base, crash_max);
 	if (!crash_base) {
 		/*
 		 * If the first attempt was for low memory, fall back to
@@ -180,6 +182,7 @@ static void __init reserve_crashkernel(void)
 		 */
 		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
 			crash_max = CRASH_ADDR_HIGH_MAX;
+			search_base = CRASH_ADDR_LOW_MAX;
 			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
 			goto retry;
 		}
@@ -189,7 +192,7 @@ static void __init reserve_crashkernel(void)
 		return;
 	}
 
-	if ((crash_base > CRASH_ADDR_LOW_MAX - crash_low_size) &&
+	if ((crash_base > CRASH_ADDR_LOW_MAX) &&
 	     crash_low_size && reserve_crashkernel_low(crash_low_size)) {
 		memblock_phys_free(crash_base, crash_size);
 		return;
  
Zhen Lei Nov. 21, 2022, noon UTC | #3
On 2022/11/21 16:31, Baoquan He wrote:
> On 11/16/22 at 08:10pm, Zhen Lei wrote:
>> For crashkernel=X without '@offset', select a region within DMA zones
>> first, and fall back to reserve region above DMA zones. This allows
>> users to use the same configuration on multiple platforms.
>>
>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
>> Acked-by: Baoquan He <bhe@redhat.com>
>> ---
>>  Documentation/admin-guide/kernel-parameters.txt |  2 +-
>>  arch/arm64/mm/init.c                            | 17 ++++++++++++++++-
>>  2 files changed, 17 insertions(+), 2 deletions(-)
>>
>> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
>> index a7b7147447b8bf8..ef6d922ed26b9dc 100644
>> --- a/Documentation/admin-guide/kernel-parameters.txt
>> +++ b/Documentation/admin-guide/kernel-parameters.txt
>> @@ -842,7 +842,7 @@
>>  			memory region [offset, offset + size] for that kernel
>>  			image. If '@offset' is omitted, then a suitable offset
>>  			is selected automatically.
>> -			[KNL, X86-64] Select a region under 4G first, and
>> +			[KNL, X86-64, ARM64] Select a region under 4G first, and
>>  			fall back to reserve region above 4G when '@offset'
>>  			hasn't been specified.
>>  			See Documentation/admin-guide/kdump/kdump.rst for further details.
>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>> index ba7227179822d10..58a0bb2c17f18cf 100644
>> --- a/arch/arm64/mm/init.c
>> +++ b/arch/arm64/mm/init.c
>> @@ -132,6 +132,7 @@ static void __init reserve_crashkernel(void)
>>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>>  	char *cmdline = boot_command_line;
>>  	int ret;
>> +	bool fixed_base = false;
>>  
>>  	if (!IS_ENABLED(CONFIG_KEXEC_CORE))
>>  		return;
>> @@ -163,12 +164,26 @@ static void __init reserve_crashkernel(void)
>>  	crash_size = PAGE_ALIGN(crash_size);
>>  
>>  	/* User specifies base address explicitly. */
>> -	if (crash_base)
>> +	if (crash_base) {
>> +		fixed_base = true;
>>  		crash_max = crash_base + crash_size;
>> +	}
>>  
>> +retry:
>>  	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>>  					       crash_base, crash_max);
> 
> This pachset looks good to me. While we observed a unexpected case,
> where a reserved region cross the high and low meomry region. I noticed
> Catalin has pointed that out. Even with the suggested code, we could
> have the kernel parameters like: crashkernel=512M,high
> crashkernel=128M,low, we finally have [4G-126M, 4G+386M], [1G, 1G+128M]
> regions in running kernel. This looks strange.

Is high-end memory fragmented? Add boot option memblock=debug and watch?

> 
> I am wondering if we can specify explicit search_base in
> memblock_phys_alloc_range() to avoid above case. Like this,
> crashkernel,high region will only exist in high memory, crashkernel,low
> region only exists in low memory region. I made a draft patch based on
> this patchset to present what the code looks like.

Looks good to me.

> 
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 58a0bb2c17f1..fd9d35e17a62 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -127,7 +127,7 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
>   */
>  static void __init reserve_crashkernel(void)
>  {
> -	unsigned long long crash_base, crash_size;
> +	unsigned long long crash_base, crash_size, search_base;

search_base needs to be initialized to 0.

>  	unsigned long long crash_low_size = 0;
>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>  	char *cmdline = boot_command_line;
> @@ -155,6 +155,7 @@ static void __init reserve_crashkernel(void)
>  		else if (ret)
>  			return;
>  
> +		search_base = CRASH_ADDR_LOW_MAX;
>  		crash_max = CRASH_ADDR_HIGH_MAX;
>  	} else if (ret || !crash_size) {
>  		/* The specified value is invalid */
> @@ -166,12 +167,13 @@ static void __init reserve_crashkernel(void)
>  	/* User specifies base address explicitly. */
>  	if (crash_base) {
>  		fixed_base = true;
> +		search_base = crash_base;
>  		crash_max = crash_base + crash_size;
>  	}
>  
>  retry:
>  	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
> -					       crash_base, crash_max);
> +					       search_base, crash_max);
>  	if (!crash_base) {
>  		/*
>  		 * If the first attempt was for low memory, fall back to
> @@ -180,6 +182,7 @@ static void __init reserve_crashkernel(void)
>  		 */
>  		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
>  			crash_max = CRASH_ADDR_HIGH_MAX;
> +			search_base = CRASH_ADDR_LOW_MAX;
>  			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>  			goto retry;
>  		}
> @@ -189,7 +192,7 @@ static void __init reserve_crashkernel(void)
>  		return;
>  	}
>  
> -	if ((crash_base > CRASH_ADDR_LOW_MAX - crash_low_size) &&
> +	if ((crash_base > CRASH_ADDR_LOW_MAX) &&
>  	     crash_low_size && reserve_crashkernel_low(crash_low_size)) {
>  		memblock_phys_free(crash_base, crash_size);
>  		return;
> 
> .
>
  

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a7b7147447b8bf8..ef6d922ed26b9dc 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -842,7 +842,7 @@ 
 			memory region [offset, offset + size] for that kernel
 			image. If '@offset' is omitted, then a suitable offset
 			is selected automatically.
-			[KNL, X86-64] Select a region under 4G first, and
+			[KNL, X86-64, ARM64] Select a region under 4G first, and
 			fall back to reserve region above 4G when '@offset'
 			hasn't been specified.
 			See Documentation/admin-guide/kdump/kdump.rst for further details.
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index ba7227179822d10..58a0bb2c17f18cf 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -132,6 +132,7 @@  static void __init reserve_crashkernel(void)
 	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
 	char *cmdline = boot_command_line;
 	int ret;
+	bool fixed_base = false;
 
 	if (!IS_ENABLED(CONFIG_KEXEC_CORE))
 		return;
@@ -163,12 +164,26 @@  static void __init reserve_crashkernel(void)
 	crash_size = PAGE_ALIGN(crash_size);
 
 	/* User specifies base address explicitly. */
-	if (crash_base)
+	if (crash_base) {
+		fixed_base = true;
 		crash_max = crash_base + crash_size;
+	}
 
+retry:
 	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
 					       crash_base, crash_max);
 	if (!crash_base) {
+		/*
+		 * If the first attempt was for low memory, fall back to
+		 * high memory, the minimum required low memory will be
+		 * reserved later.
+		 */
+		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
+			crash_max = CRASH_ADDR_HIGH_MAX;
+			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
+			goto retry;
+		}
+
 		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
 			crash_size);
 		return;