[v3,3/3] kexec: Introduce parameters load_limit_reboot and load_limit_panic

Message ID 20221114-disable-kexec-reset-v3-3-4ef4e929adf6@chromium.org
State New
Headers
Series kexec: Add new parameter to limit the access to kexec |

Commit Message

Ricardo Ribalda Dec. 20, 2022, 10:05 p.m. UTC
  Add two parameter to specify how many times a kexec kernel can be loaded.

The sysadmin can set different limits for kexec panic and kexec reboot
kernels.

The value can be modified at runtime via sysfs, but only with a value
smaller than the current one (except -1).

Signed-off-by: Ricardo Ribalda <ribalda@chromium.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 14 ++++
 include/linux/kexec.h                           |  2 +-
 kernel/kexec.c                                  |  2 +-
 kernel/kexec_core.c                             | 91 ++++++++++++++++++++++++-
 kernel/kexec_file.c                             |  2 +-
 5 files changed, 106 insertions(+), 5 deletions(-)
  

Comments

Steven Rostedt Dec. 21, 2022, 12:22 a.m. UTC | #1
On Tue, 20 Dec 2022 23:05:45 +0100
Ricardo Ribalda <ribalda@chromium.org> wrote:

I hate to be the grammar police, but..

> Add two parameter to specify how many times a kexec kernel can be loaded.

   "parameters"

> 
> The sysadmin can set different limits for kexec panic and kexec reboot
> kernels.
> 
> The value can be modified at runtime via sysfs, but only with a value
> smaller than the current one (except -1).
> 
> Signed-off-by: Ricardo Ribalda <ribalda@chromium.org>
> ---
>  Documentation/admin-guide/kernel-parameters.txt | 14 ++++
>  include/linux/kexec.h                           |  2 +-
>  kernel/kexec.c                                  |  2 +-
>  kernel/kexec_core.c                             | 91 ++++++++++++++++++++++++-
>  kernel/kexec_file.c                             |  2 +-
>  5 files changed, 106 insertions(+), 5 deletions(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 42af9ca0127e..2b37d6a20747 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -2374,6 +2374,20 @@
>  			for Movable pages.  "nn[KMGTPE]", "nn%", and "mirror"
>  			are exclusive, so you cannot specify multiple forms.
>  
> +	kexec_core.load_limit_reboot=
> +	kexec_core.load_limit_panic=
> +			[KNL]
> +			This parameter specifies a limit to the number of times
> +			a kexec kernel can be loaded.
> +			Format: <int>
> +			-1  = Unlimited.
> +			int = Number of times kexec can be called.
> +
> +			During runtime, this parameter can be modified with a

> +			value smaller than the current one (but not -1).

Perhaps state:
			smaller positive value than the current one or if
			current is currently -1.

> +
> +			Default: -1
> +
>  	kgdbdbgp=	[KGDB,HW] kgdb over EHCI usb debug port.
>  			Format: <Controller#>[,poll interval]
>  			The controller # is the number of the ehci usb debug
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index 182e0c11b87b..5daf9990d5b8 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -407,7 +407,7 @@ extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
>  extern struct kimage *kexec_image;
>  extern struct kimage *kexec_crash_image;
>  
> -bool kexec_load_permitted(void);
> +bool kexec_load_permitted(bool crash_image);
>  
>  #ifndef kexec_flush_icache_page
>  #define kexec_flush_icache_page(page)
> diff --git a/kernel/kexec.c b/kernel/kexec.c
> index ce1bca874a8d..7aefd134e319 100644
> --- a/kernel/kexec.c
> +++ b/kernel/kexec.c
> @@ -193,7 +193,7 @@ static inline int kexec_load_check(unsigned long nr_segments,
>  	int result;
>  
>  	/* We only trust the superuser with rebooting the system. */
> -	if (!kexec_load_permitted())
> +	if (!kexec_load_permitted(flags & KEXEC_ON_CRASH))

Note, here we have KEXEC_ON_CRASH (see bottom).

>  		return -EPERM;
>  
>  	/* Permit LSMs and IMA to fail the kexec */
> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> index a1efc70f4158..adf71f2be3ff 100644
> --- a/kernel/kexec_core.c
> +++ b/kernel/kexec_core.c
> @@ -952,13 +952,100 @@ static int __init kexec_core_sysctl_init(void)
>  late_initcall(kexec_core_sysctl_init);
>  #endif
>  
> -bool kexec_load_permitted(void)
> +struct kexec_load_limit {
> +	/* Mutex protects the limit count. */
> +	struct mutex mutex;
> +	int limit;
> +};
> +
> +struct kexec_load_limit load_limit_reboot = {

Perhaps make the above static?

> +	.mutex = __MUTEX_INITIALIZER(load_limit_reboot.mutex),
> +	.limit = -1,
> +};
> +
> +struct kexec_load_limit load_limit_panic = {

static?

> +	.mutex = __MUTEX_INITIALIZER(load_limit_panic.mutex),
> +	.limit = -1,
> +};
> +
> +static int param_get_limit(char *buffer, const struct kernel_param *kp)
>  {
> +	int ret;
> +	struct kexec_load_limit *limit = kp->arg;

Looks better if "int ret;" is after the "limit".

> +
> +	mutex_lock(&limit->mutex);
> +	ret = scnprintf(buffer, PAGE_SIZE, "%i\n", limit->limit);

The above string can be at most "-2147483648\n\0"

Which is 13 characters. Why use PAGE_SIZE. Or scnprintf(), and not just
state:

	/* buffer is PAGE_SIZE, much larger than what %i can be */
	ret = sprintf(buffer, "%i\n", limit->limit);

> +	mutex_unlock(&limit->mutex);
> +
> +	return ret;
> +}
> +
> +static int param_set_limit(const char *buffer, const struct kernel_param *kp)
> +{
> +	int ret;
> +	struct kexec_load_limit *limit = kp->arg;
> +	int new_val;
> +
> +	ret = kstrtoint(buffer, 0, &new_val);
> +	if (ret)
> +		return ret;
> +
> +	new_val = max(-1, new_val);

I wonder if anything less than -1 should be invalid.

> +
> +	mutex_lock(&limit->mutex);
> +
> +	if (new_val == -1 && limit->limit != -1) {

If -1 can't change the value, why allow it to be passed in to begin with.

Perhaps we should only allow sysctl to set positive values? Would make the
code simpler.

> +		ret = -EINVAL;
> +		goto done;
> +	}
> +
> +	if (limit->limit != -1 && new_val > limit->limit) {

Since the above documentation said "small than" perhaps ">="?

> +		ret = -EINVAL;
> +		goto done;
> +	}
> +
> +	limit->limit = new_val;
> +
> +done:
> +	mutex_unlock(&limit->mutex);
> +
> +	return ret;
> +}
> +
> +static const struct kernel_param_ops load_limit_ops = {
> +	.get = param_get_limit,
> +	.set = param_set_limit,
> +};
> +
> +module_param_cb(load_limit_reboot, &load_limit_ops, &load_limit_reboot, 0644);
> +MODULE_PARM_DESC(load_limit_reboot, "Maximum attempts to load a kexec reboot kernel");
> +
> +module_param_cb(load_limit_panic, &load_limit_ops, &load_limit_panic, 0644);
> +MODULE_PARM_DESC(load_limit_reboot, "Maximum attempts to load a kexec panic kernel");

Wait, why the module params if this can not be a module?

The kernel/kexec.c is decided via CONFIG_KEXEC_CORE which is bool. Either
builtin or not at all. No module selection possible.

For kernel parameters, we should just use __setup(), right?

> +
> +bool kexec_load_permitted(bool crash_image)
> +{
> +	struct kexec_load_limit *limit;
> +
>  	/*
>  	 * Only the superuser can use the kexec syscall and if it has not
>  	 * been disabled.
>  	 */
> -	return capable(CAP_SYS_BOOT) && !kexec_load_disabled;
> +	if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
> +		return false;
> +
> +	/* Check limit counter and decrease it.*/
> +	limit = crash_image ? &load_limit_panic : &load_limit_reboot;
> +	mutex_lock(&limit->mutex);
> +	if (!limit->limit) {
> +		mutex_unlock(&limit->mutex);
> +		return false;
> +	}
> +	if (limit->limit != -1)
> +		limit->limit--;
> +	mutex_unlock(&limit->mutex);
> +
> +	return true;
>  }
>  
>  /*
> diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
> index 29efa43ea951..6a1d4b07635e 100644
> --- a/kernel/kexec_file.c
> +++ b/kernel/kexec_file.c
> @@ -330,7 +330,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
>  	struct kimage **dest_image, *image;
>  
>  	/* We only trust the superuser with rebooting the system. */
> -	if (!kexec_load_permitted())
> +	if (!kexec_load_permitted(flags & KEXEC_FILE_FLAGS))

Here we have KEXEC_FILE_FLAGS, where above it was KEXCE_FILE_CRASH.

This is confusing to what denotes the "crash_image" boolean. Can we just
pass in flags and figure it out in the kexec_load_permitted() function?

-- Steve


>  		return -EPERM;
>  
>  	/* Make sure we have a legal set of flags */
>
  
Ricardo Ribalda Dec. 21, 2022, 1:22 a.m. UTC | #2
Hi Steven

Thanks for your review!!! Will send a new version.
After giving it a thought... you are right :). setting the current
value should return -EINVAL. We should only return OK if we actually
do something.

On Wed, 21 Dec 2022 at 01:22, Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Tue, 20 Dec 2022 23:05:45 +0100
> Ricardo Ribalda <ribalda@chromium.org> wrote:
>
> I hate to be the grammar police, but..
>
> > Add two parameter to specify how many times a kexec kernel can be loaded.
>
>    "parameters"
>
> >
> > The sysadmin can set different limits for kexec panic and kexec reboot
> > kernels.
> >
> > The value can be modified at runtime via sysfs, but only with a value
> > smaller than the current one (except -1).
> >
> > Signed-off-by: Ricardo Ribalda <ribalda@chromium.org>
> > ---
> >  Documentation/admin-guide/kernel-parameters.txt | 14 ++++
> >  include/linux/kexec.h                           |  2 +-
> >  kernel/kexec.c                                  |  2 +-
> >  kernel/kexec_core.c                             | 91 ++++++++++++++++++++++++-
> >  kernel/kexec_file.c                             |  2 +-
> >  5 files changed, 106 insertions(+), 5 deletions(-)
> >
> > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> > index 42af9ca0127e..2b37d6a20747 100644
> > --- a/Documentation/admin-guide/kernel-parameters.txt
> > +++ b/Documentation/admin-guide/kernel-parameters.txt
> > @@ -2374,6 +2374,20 @@
> >                       for Movable pages.  "nn[KMGTPE]", "nn%", and "mirror"
> >                       are exclusive, so you cannot specify multiple forms.
> >
> > +     kexec_core.load_limit_reboot=
> > +     kexec_core.load_limit_panic=
> > +                     [KNL]
> > +                     This parameter specifies a limit to the number of times
> > +                     a kexec kernel can be loaded.
> > +                     Format: <int>
> > +                     -1  = Unlimited.
> > +                     int = Number of times kexec can be called.
> > +
> > +                     During runtime, this parameter can be modified with a
>
> > +                     value smaller than the current one (but not -1).
>
> Perhaps state:
>                         smaller positive value than the current one or if
>                         current is currently -1.

I find it a bit complicated..
What about:

 During runtime this parameter can be modified with a more restrictive value




>
> > +
> > +                     Default: -1
> > +
> >       kgdbdbgp=       [KGDB,HW] kgdb over EHCI usb debug port.
> >                       Format: <Controller#>[,poll interval]
> >                       The controller # is the number of the ehci usb debug
> > diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> > index 182e0c11b87b..5daf9990d5b8 100644
> > --- a/include/linux/kexec.h
> > +++ b/include/linux/kexec.h
> > @@ -407,7 +407,7 @@ extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
> >  extern struct kimage *kexec_image;
> >  extern struct kimage *kexec_crash_image;
> >
> > -bool kexec_load_permitted(void);
> > +bool kexec_load_permitted(bool crash_image);
> >
> >  #ifndef kexec_flush_icache_page
> >  #define kexec_flush_icache_page(page)
> > diff --git a/kernel/kexec.c b/kernel/kexec.c
> > index ce1bca874a8d..7aefd134e319 100644
> > --- a/kernel/kexec.c
> > +++ b/kernel/kexec.c
> > @@ -193,7 +193,7 @@ static inline int kexec_load_check(unsigned long nr_segments,
> >       int result;
> >
> >       /* We only trust the superuser with rebooting the system. */
> > -     if (!kexec_load_permitted())
> > +     if (!kexec_load_permitted(flags & KEXEC_ON_CRASH))
>
> Note, here we have KEXEC_ON_CRASH (see bottom).
>
> >               return -EPERM;
> >
> >       /* Permit LSMs and IMA to fail the kexec */
> > diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> > index a1efc70f4158..adf71f2be3ff 100644
> > --- a/kernel/kexec_core.c
> > +++ b/kernel/kexec_core.c
> > @@ -952,13 +952,100 @@ static int __init kexec_core_sysctl_init(void)
> >  late_initcall(kexec_core_sysctl_init);
> >  #endif
> >
> > -bool kexec_load_permitted(void)
> > +struct kexec_load_limit {
> > +     /* Mutex protects the limit count. */
> > +     struct mutex mutex;
> > +     int limit;
> > +};
> > +
> > +struct kexec_load_limit load_limit_reboot = {
>
> Perhaps make the above static?
>
> > +     .mutex = __MUTEX_INITIALIZER(load_limit_reboot.mutex),
> > +     .limit = -1,
> > +};
> > +
> > +struct kexec_load_limit load_limit_panic = {
>
> static?
>
> > +     .mutex = __MUTEX_INITIALIZER(load_limit_panic.mutex),
> > +     .limit = -1,
> > +};
> > +
> > +static int param_get_limit(char *buffer, const struct kernel_param *kp)
> >  {
> > +     int ret;
> > +     struct kexec_load_limit *limit = kp->arg;
>
> Looks better if "int ret;" is after the "limit".
>
> > +
> > +     mutex_lock(&limit->mutex);
> > +     ret = scnprintf(buffer, PAGE_SIZE, "%i\n", limit->limit);
>
> The above string can be at most "-2147483648\n\0"
>
> Which is 13 characters. Why use PAGE_SIZE. Or scnprintf(), and not just
> state:
>
like it!
>         /* buffer is PAGE_SIZE, much larger than what %i can be */
>         ret = sprintf(buffer, "%i\n", limit->limit);
>
> > +     mutex_unlock(&limit->mutex);
> > +
> > +     return ret;
> > +}
> > +
> > +static int param_set_limit(const char *buffer, const struct kernel_param *kp)
> > +{
> > +     int ret;
> > +     struct kexec_load_limit *limit = kp->arg;
> > +     int new_val;
> > +
> > +     ret = kstrtoint(buffer, 0, &new_val);
> > +     if (ret)
> > +             return ret;
> > +
> > +     new_val = max(-1, new_val);
>
> I wonder if anything less than -1 should be invalid.
>
> > +
> > +     mutex_lock(&limit->mutex);
> > +
> > +     if (new_val == -1 && limit->limit != -1) {
>
> If -1 can't change the value, why allow it to be passed in to begin with.
>
> Perhaps we should only allow sysctl to set positive values? Would make the
> code simpler.
>
> > +             ret = -EINVAL;
> > +             goto done;
> > +     }
> > +
> > +     if (limit->limit != -1 && new_val > limit->limit) {
>
> Since the above documentation said "small than" perhaps ">="?
>
> > +             ret = -EINVAL;
> > +             goto done;
> > +     }
> > +
> > +     limit->limit = new_val;
> > +
> > +done:
> > +     mutex_unlock(&limit->mutex);
> > +
> > +     return ret;
> > +}
> > +
> > +static const struct kernel_param_ops load_limit_ops = {
> > +     .get = param_get_limit,
> > +     .set = param_set_limit,
> > +};
> > +
> > +module_param_cb(load_limit_reboot, &load_limit_ops, &load_limit_reboot, 0644);
> > +MODULE_PARM_DESC(load_limit_reboot, "Maximum attempts to load a kexec reboot kernel");
> > +
> > +module_param_cb(load_limit_panic, &load_limit_ops, &load_limit_panic, 0644);
> > +MODULE_PARM_DESC(load_limit_reboot, "Maximum attempts to load a kexec panic kernel");
>
> Wait, why the module params if this can not be a module?
>
> The kernel/kexec.c is decided via CONFIG_KEXEC_CORE which is bool. Either
> builtin or not at all. No module selection possible.
>
> For kernel parameters, we should just use __setup(), right?

Isn't __setup() only kernel parameter and then it cannot be updated on runtime?

What about using late_param_cb? and remove MODULE_PARAM_DESC ?

I think this is how these parameters work

$ ls /sys/module/kernel/parameters/
consoleblank  crash_kexec_post_notifiers  ignore_rlimit_data
initcall_debug  module_blacklist  panic  panic_on_warn  panic_print
pause_on_oops


>
> > +
> > +bool kexec_load_permitted(bool crash_image)
> > +{
> > +     struct kexec_load_limit *limit;
> > +
> >       /*
> >        * Only the superuser can use the kexec syscall and if it has not
> >        * been disabled.
> >        */
> > -     return capable(CAP_SYS_BOOT) && !kexec_load_disabled;
> > +     if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
> > +             return false;
> > +
> > +     /* Check limit counter and decrease it.*/
> > +     limit = crash_image ? &load_limit_panic : &load_limit_reboot;
> > +     mutex_lock(&limit->mutex);
> > +     if (!limit->limit) {
> > +             mutex_unlock(&limit->mutex);
> > +             return false;
> > +     }
> > +     if (limit->limit != -1)
> > +             limit->limit--;
> > +     mutex_unlock(&limit->mutex);
> > +
> > +     return true;
> >  }
> >
> >  /*
> > diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
> > index 29efa43ea951..6a1d4b07635e 100644
> > --- a/kernel/kexec_file.c
> > +++ b/kernel/kexec_file.c
> > @@ -330,7 +330,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
> >       struct kimage **dest_image, *image;
> >
> >       /* We only trust the superuser with rebooting the system. */
> > -     if (!kexec_load_permitted())
> > +     if (!kexec_load_permitted(flags & KEXEC_FILE_FLAGS))
>
> Here we have KEXEC_FILE_FLAGS, where above it was KEXCE_FILE_CRASH.
>
> This is confusing to what denotes the "crash_image" boolean. Can we just
> pass in flags and figure it out in the kexec_load_permitted() function?

This is a typo and a bad one!, thanks for catching up!.

It should be KEXEC_ON_CRASH and KEXEC_FILE_ON_CRASH, Of course both
have different values

I could pass the flags and then check for flags & (KEXEC_ON_CRASH |
KEXEC_FILE_ON_CRASH)... but not sure if it is better
>
> -- Steve
>
>
> >               return -EPERM;
> >
> >       /* Make sure we have a legal set of flags */
> >
>
  
Steven Rostedt Dec. 21, 2022, 2:52 a.m. UTC | #3
On Wed, 21 Dec 2022 02:22:57 +0100
Ricardo Ribalda <ribalda@chromium.org> wrote:

> > > +     kexec_core.load_limit_reboot=
> > > +     kexec_core.load_limit_panic=
> > > +                     [KNL]
> > > +                     This parameter specifies a limit to the number of times
> > > +                     a kexec kernel can be loaded.
> > > +                     Format: <int>
> > > +                     -1  = Unlimited.
> > > +                     int = Number of times kexec can be called.
> > > +
> > > +                     During runtime, this parameter can be modified with a  
> >  
> > > +                     value smaller than the current one (but not -1).  
> >
> > Perhaps state:
> >                         smaller positive value than the current one or if
> >                         current is currently -1.  
> 
> I find it a bit complicated..
> What about:
> 
>  During runtime this parameter can be modified with a more restrictive value

Sure. That's better than the original.


> > > +module_param_cb(load_limit_reboot, &load_limit_ops, &load_limit_reboot, 0644);
> > > +MODULE_PARM_DESC(load_limit_reboot, "Maximum attempts to load a kexec reboot kernel");
> > > +
> > > +module_param_cb(load_limit_panic, &load_limit_ops, &load_limit_panic, 0644);
> > > +MODULE_PARM_DESC(load_limit_reboot, "Maximum attempts to load a kexec panic kernel");  
> >
> > Wait, why the module params if this can not be a module?
> >
> > The kernel/kexec.c is decided via CONFIG_KEXEC_CORE which is bool. Either
> > builtin or not at all. No module selection possible.
> >
> > For kernel parameters, we should just use __setup(), right?  
> 
> Isn't __setup() only kernel parameter and then it cannot be updated on runtime?

Yes, but then we use sysctl.

> 
> What about using late_param_cb? and remove MODULE_PARAM_DESC ?
> 
> I think this is how these parameters work
> 
> $ ls /sys/module/kernel/parameters/
> consoleblank  crash_kexec_post_notifiers  ignore_rlimit_data
> initcall_debug  module_blacklist  panic  panic_on_warn  panic_print
> pause_on_oops

Well, I think those are more leftovers and not something we want to add to.

I believe sysctl is the better option, and a more common one:

$ ls /proc/sys/kernel/
acct                           perf_event_max_contexts_per_stack
acpi_video_flags               perf_event_max_sample_rate
auto_msgmni                    perf_event_max_stack
bootloader_type                perf_event_mlock_kb
bootloader_version             perf_event_paranoid
bpf_stats_enabled              pid_max
cad_pid                        poweroff_cmd
cap_last_cap                   print-fatal-signals
core_pattern                   printk
core_pipe_limit                printk_delay
core_uses_pid                  printk_devkmsg
ctrl-alt-del                   printk_ratelimit
dmesg_restrict                 printk_ratelimit_burst
domainname                     pty
firmware_config                random
ftrace_dump_on_oops            randomize_va_space
ftrace_enabled                 real-root-dev
hardlockup_all_cpu_backtrace   sched_autogroup_enabled
hardlockup_panic               sched_child_runs_first
hostname                       sched_deadline_period_max_us
hotplug                        sched_deadline_period_min_us
hung_task_all_cpu_backtrace    sched_energy_aware
hung_task_check_count          sched_rr_timeslice_ms
hung_task_check_interval_secs  sched_rt_period_us
hung_task_panic                sched_rt_runtime_us
hung_task_timeout_secs         sched_util_clamp_max
hung_task_warnings             sched_util_clamp_min
io_delay_type                  sched_util_clamp_min_rt_default
kexec_load_disabled            seccomp
keys                           sem
kptr_restrict                  sem_next_id
max_lock_depth                 shmall
max_rcu_stall_to_panic         shmmax
modprobe                       shmmni
modules_disabled               shm_next_id
msgmax                         shm_rmid_forced
msgmnb                         softlockup_all_cpu_backtrace
msgmni                         softlockup_panic
msg_next_id                    soft_watchdog
ngroups_max                    stack_tracer_enabled
nmi_watchdog                   sysctl_writes_strict
ns_last_pid                    sysrq
numa_balancing                 tainted
oops_all_cpu_backtrace         task_delayacct
osrelease                      threads-max
ostype                         timer_migration
overflowgid                    traceoff_on_warning
overflowuid                    tracepoint_printk
panic                          unknown_nmi_panic
panic_on_io_nmi                unprivileged_bpf_disabled
panic_on_oops                  usermodehelper
panic_on_rcu_stall             version
panic_on_unrecovered_nmi       watchdog
panic_on_warn                  watchdog_cpumask
panic_print                    watchdog_thresh
perf_cpu_time_max_percent      yama



> I could pass the flags and then check for flags & (KEXEC_ON_CRASH |
> KEXEC_FILE_ON_CRASH)... but not sure if it is better

It keeps the processing in a single place, which helps avoid bugs like this.

-- Steve
  

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 42af9ca0127e..2b37d6a20747 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2374,6 +2374,20 @@ 
 			for Movable pages.  "nn[KMGTPE]", "nn%", and "mirror"
 			are exclusive, so you cannot specify multiple forms.
 
+	kexec_core.load_limit_reboot=
+	kexec_core.load_limit_panic=
+			[KNL]
+			This parameter specifies a limit to the number of times
+			a kexec kernel can be loaded.
+			Format: <int>
+			-1  = Unlimited.
+			int = Number of times kexec can be called.
+
+			During runtime, this parameter can be modified with a
+			value smaller than the current one (but not -1).
+
+			Default: -1
+
 	kgdbdbgp=	[KGDB,HW] kgdb over EHCI usb debug port.
 			Format: <Controller#>[,poll interval]
 			The controller # is the number of the ehci usb debug
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 182e0c11b87b..5daf9990d5b8 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -407,7 +407,7 @@  extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
 extern struct kimage *kexec_image;
 extern struct kimage *kexec_crash_image;
 
-bool kexec_load_permitted(void);
+bool kexec_load_permitted(bool crash_image);
 
 #ifndef kexec_flush_icache_page
 #define kexec_flush_icache_page(page)
diff --git a/kernel/kexec.c b/kernel/kexec.c
index ce1bca874a8d..7aefd134e319 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -193,7 +193,7 @@  static inline int kexec_load_check(unsigned long nr_segments,
 	int result;
 
 	/* We only trust the superuser with rebooting the system. */
-	if (!kexec_load_permitted())
+	if (!kexec_load_permitted(flags & KEXEC_ON_CRASH))
 		return -EPERM;
 
 	/* Permit LSMs and IMA to fail the kexec */
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index a1efc70f4158..adf71f2be3ff 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -952,13 +952,100 @@  static int __init kexec_core_sysctl_init(void)
 late_initcall(kexec_core_sysctl_init);
 #endif
 
-bool kexec_load_permitted(void)
+struct kexec_load_limit {
+	/* Mutex protects the limit count. */
+	struct mutex mutex;
+	int limit;
+};
+
+struct kexec_load_limit load_limit_reboot = {
+	.mutex = __MUTEX_INITIALIZER(load_limit_reboot.mutex),
+	.limit = -1,
+};
+
+struct kexec_load_limit load_limit_panic = {
+	.mutex = __MUTEX_INITIALIZER(load_limit_panic.mutex),
+	.limit = -1,
+};
+
+static int param_get_limit(char *buffer, const struct kernel_param *kp)
 {
+	int ret;
+	struct kexec_load_limit *limit = kp->arg;
+
+	mutex_lock(&limit->mutex);
+	ret = scnprintf(buffer, PAGE_SIZE, "%i\n", limit->limit);
+	mutex_unlock(&limit->mutex);
+
+	return ret;
+}
+
+static int param_set_limit(const char *buffer, const struct kernel_param *kp)
+{
+	int ret;
+	struct kexec_load_limit *limit = kp->arg;
+	int new_val;
+
+	ret = kstrtoint(buffer, 0, &new_val);
+	if (ret)
+		return ret;
+
+	new_val = max(-1, new_val);
+
+	mutex_lock(&limit->mutex);
+
+	if (new_val == -1 && limit->limit != -1) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	if (limit->limit != -1 && new_val > limit->limit) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	limit->limit = new_val;
+
+done:
+	mutex_unlock(&limit->mutex);
+
+	return ret;
+}
+
+static const struct kernel_param_ops load_limit_ops = {
+	.get = param_get_limit,
+	.set = param_set_limit,
+};
+
+module_param_cb(load_limit_reboot, &load_limit_ops, &load_limit_reboot, 0644);
+MODULE_PARM_DESC(load_limit_reboot, "Maximum attempts to load a kexec reboot kernel");
+
+module_param_cb(load_limit_panic, &load_limit_ops, &load_limit_panic, 0644);
+MODULE_PARM_DESC(load_limit_reboot, "Maximum attempts to load a kexec panic kernel");
+
+bool kexec_load_permitted(bool crash_image)
+{
+	struct kexec_load_limit *limit;
+
 	/*
 	 * Only the superuser can use the kexec syscall and if it has not
 	 * been disabled.
 	 */
-	return capable(CAP_SYS_BOOT) && !kexec_load_disabled;
+	if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
+		return false;
+
+	/* Check limit counter and decrease it.*/
+	limit = crash_image ? &load_limit_panic : &load_limit_reboot;
+	mutex_lock(&limit->mutex);
+	if (!limit->limit) {
+		mutex_unlock(&limit->mutex);
+		return false;
+	}
+	if (limit->limit != -1)
+		limit->limit--;
+	mutex_unlock(&limit->mutex);
+
+	return true;
 }
 
 /*
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 29efa43ea951..6a1d4b07635e 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -330,7 +330,7 @@  SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
 	struct kimage **dest_image, *image;
 
 	/* We only trust the superuser with rebooting the system. */
-	if (!kexec_load_permitted())
+	if (!kexec_load_permitted(flags & KEXEC_FILE_FLAGS))
 		return -EPERM;
 
 	/* Make sure we have a legal set of flags */