[RFC,07/86] Revert "livepatch,sched: Add livepatch task switching to cond_resched()"

Message ID 20231107215742.363031-8-ankur.a.arora@oracle.com
State New
Headers
Series Make the kernel preemptible |

Commit Message

Ankur Arora Nov. 7, 2023, 9:56 p.m. UTC
  This reverts commit e3ff7c609f39671d1aaff4fb4a8594e14f3e03f8.

Note that removing this commit reintroduces "live patches failing to
complete within a reasonable amount of time due to CPU-bound kthreads."

Unfortunately this fix depends quite critically on PREEMPT_DYNAMIC and
existence of cond_resched() so this will need an alternate fix.

Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 include/linux/livepatch.h       |   1 -
 include/linux/livepatch_sched.h |  29 ---------
 include/linux/sched.h           |  20 ++----
 kernel/livepatch/core.c         |   1 -
 kernel/livepatch/transition.c   | 107 +++++---------------------------
 kernel/sched/core.c             |  64 +++----------------
 6 files changed, 28 insertions(+), 194 deletions(-)
 delete mode 100644 include/linux/livepatch_sched.h
  

Comments

Steven Rostedt Nov. 7, 2023, 11:16 p.m. UTC | #1
On Tue,  7 Nov 2023 13:56:53 -0800
Ankur Arora <ankur.a.arora@oracle.com> wrote:

> This reverts commit e3ff7c609f39671d1aaff4fb4a8594e14f3e03f8.
> 
> Note that removing this commit reintroduces "live patches failing to
> complete within a reasonable amount of time due to CPU-bound kthreads."
> 
> Unfortunately this fix depends quite critically on PREEMPT_DYNAMIC and
> existence of cond_resched() so this will need an alternate fix.
> 

Then it would probably be a good idea to Cc the live patching maintainers!

-- Steve

> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
>  include/linux/livepatch.h       |   1 -
>  include/linux/livepatch_sched.h |  29 ---------
>  include/linux/sched.h           |  20 ++----
>  kernel/livepatch/core.c         |   1 -
>  kernel/livepatch/transition.c   | 107 +++++---------------------------
>  kernel/sched/core.c             |  64 +++----------------
>  6 files changed, 28 insertions(+), 194 deletions(-)
>  delete mode 100644 include/linux/livepatch_sched.h
> 
> diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
> index 9b9b38e89563..293e29960c6e 100644
> --- a/include/linux/livepatch.h
> +++ b/include/linux/livepatch.h
> @@ -13,7 +13,6 @@
>  #include <linux/ftrace.h>
>  #include <linux/completion.h>
>  #include <linux/list.h>
> -#include <linux/livepatch_sched.h>
>  
>  #if IS_ENABLED(CONFIG_LIVEPATCH)
>  
> diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h
> deleted file mode 100644
> index 013794fb5da0..000000000000
> --- a/include/linux/livepatch_sched.h
> +++ /dev/null
> @@ -1,29 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0-or-later */
> -#ifndef _LINUX_LIVEPATCH_SCHED_H_
> -#define _LINUX_LIVEPATCH_SCHED_H_
> -
> -#include <linux/jump_label.h>
> -#include <linux/static_call_types.h>
> -
> -#ifdef CONFIG_LIVEPATCH
> -
> -void __klp_sched_try_switch(void);
> -
> -#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
> -
> -DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
> -
> -static __always_inline void klp_sched_try_switch(void)
> -{
> -	if (static_branch_unlikely(&klp_sched_try_switch_key))
> -		__klp_sched_try_switch();
> -}
> -
> -#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
> -
> -#else /* !CONFIG_LIVEPATCH */
> -static inline void klp_sched_try_switch(void) {}
> -static inline void __klp_sched_try_switch(void) {}
> -#endif /* CONFIG_LIVEPATCH */
> -
> -#endif /* _LINUX_LIVEPATCH_SCHED_H_ */
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 5bdf80136e42..c5b0ef1ecfe4 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -36,7 +36,6 @@
>  #include <linux/seqlock.h>
>  #include <linux/kcsan.h>
>  #include <linux/rv.h>
> -#include <linux/livepatch_sched.h>
>  #include <asm/kmap_size.h>
>  
>  /* task_struct member predeclarations (sorted alphabetically): */
> @@ -2087,9 +2086,6 @@ extern int __cond_resched(void);
>  
>  #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
>  
> -void sched_dynamic_klp_enable(void);
> -void sched_dynamic_klp_disable(void);
> -
>  DECLARE_STATIC_CALL(cond_resched, __cond_resched);
>  
>  static __always_inline int _cond_resched(void)
> @@ -2098,7 +2094,6 @@ static __always_inline int _cond_resched(void)
>  }
>  
>  #elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
> -
>  extern int dynamic_cond_resched(void);
>  
>  static __always_inline int _cond_resched(void)
> @@ -2106,25 +2101,20 @@ static __always_inline int _cond_resched(void)
>  	return dynamic_cond_resched();
>  }
>  
> -#else /* !CONFIG_PREEMPTION */
> +#else
>  
>  static inline int _cond_resched(void)
>  {
> -	klp_sched_try_switch();
>  	return __cond_resched();
>  }
>  
> -#endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
> +#endif /* CONFIG_PREEMPT_DYNAMIC */
>  
> -#else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */
> +#else
>  
> -static inline int _cond_resched(void)
> -{
> -	klp_sched_try_switch();
> -	return 0;
> -}
> +static inline int _cond_resched(void) { return 0; }
>  
> -#endif /* !CONFIG_PREEMPTION || CONFIG_PREEMPT_DYNAMIC */
> +#endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
>  
>  #define cond_resched() ({			\
>  	__might_resched(__FILE__, __LINE__, 0);	\
> diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
> index 61328328c474..fc851455740c 100644
> --- a/kernel/livepatch/core.c
> +++ b/kernel/livepatch/core.c
> @@ -33,7 +33,6 @@
>   *
>   * - klp_ftrace_handler()
>   * - klp_update_patch_state()
> - * - __klp_sched_try_switch()
>   */
>  DEFINE_MUTEX(klp_mutex);
>  
> diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
> index e54c3d60a904..70bc38f27af7 100644
> --- a/kernel/livepatch/transition.c
> +++ b/kernel/livepatch/transition.c
> @@ -9,7 +9,6 @@
>  
>  #include <linux/cpu.h>
>  #include <linux/stacktrace.h>
> -#include <linux/static_call.h>
>  #include "core.h"
>  #include "patch.h"
>  #include "transition.h"
> @@ -27,25 +26,6 @@ static int klp_target_state = KLP_UNDEFINED;
>  
>  static unsigned int klp_signals_cnt;
>  
> -/*
> - * When a livepatch is in progress, enable klp stack checking in
> - * cond_resched().  This helps CPU-bound kthreads get patched.
> - */
> -#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
> -
> -#define klp_cond_resched_enable() sched_dynamic_klp_enable()
> -#define klp_cond_resched_disable() sched_dynamic_klp_disable()
> -
> -#else /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
> -
> -DEFINE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
> -EXPORT_SYMBOL(klp_sched_try_switch_key);
> -
> -#define klp_cond_resched_enable() static_branch_enable(&klp_sched_try_switch_key)
> -#define klp_cond_resched_disable() static_branch_disable(&klp_sched_try_switch_key)
> -
> -#endif /* CONFIG_PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
> -
>  /*
>   * This work can be performed periodically to finish patching or unpatching any
>   * "straggler" tasks which failed to transition in the first attempt.
> @@ -194,8 +174,8 @@ void klp_update_patch_state(struct task_struct *task)
>  	 * barrier (smp_rmb) for two cases:
>  	 *
>  	 * 1) Enforce the order of the TIF_PATCH_PENDING read and the
> -	 *    klp_target_state read.  The corresponding write barriers are in
> -	 *    klp_init_transition() and klp_reverse_transition().
> +	 *    klp_target_state read.  The corresponding write barrier is in
> +	 *    klp_init_transition().
>  	 *
>  	 * 2) Enforce the order of the TIF_PATCH_PENDING read and a future read
>  	 *    of func->transition, if klp_ftrace_handler() is called later on
> @@ -363,44 +343,6 @@ static bool klp_try_switch_task(struct task_struct *task)
>  	return !ret;
>  }
>  
> -void __klp_sched_try_switch(void)
> -{
> -	if (likely(!klp_patch_pending(current)))
> -		return;
> -
> -	/*
> -	 * This function is called from cond_resched() which is called in many
> -	 * places throughout the kernel.  Using the klp_mutex here might
> -	 * deadlock.
> -	 *
> -	 * Instead, disable preemption to prevent racing with other callers of
> -	 * klp_try_switch_task().  Thanks to task_call_func() they won't be
> -	 * able to switch this task while it's running.
> -	 */
> -	preempt_disable();
> -
> -	/*
> -	 * Make sure current didn't get patched between the above check and
> -	 * preempt_disable().
> -	 */
> -	if (unlikely(!klp_patch_pending(current)))
> -		goto out;
> -
> -	/*
> -	 * Enforce the order of the TIF_PATCH_PENDING read above and the
> -	 * klp_target_state read in klp_try_switch_task().  The corresponding
> -	 * write barriers are in klp_init_transition() and
> -	 * klp_reverse_transition().
> -	 */
> -	smp_rmb();
> -
> -	klp_try_switch_task(current);
> -
> -out:
> -	preempt_enable();
> -}
> -EXPORT_SYMBOL(__klp_sched_try_switch);
> -
>  /*
>   * Sends a fake signal to all non-kthread tasks with TIF_PATCH_PENDING set.
>   * Kthreads with TIF_PATCH_PENDING set are woken up.
> @@ -507,8 +449,7 @@ void klp_try_complete_transition(void)
>  		return;
>  	}
>  
> -	/* Done!  Now cleanup the data structures. */
> -	klp_cond_resched_disable();
> +	/* we're done, now cleanup the data structures */
>  	patch = klp_transition_patch;
>  	klp_complete_transition();
>  
> @@ -560,8 +501,6 @@ void klp_start_transition(void)
>  			set_tsk_thread_flag(task, TIF_PATCH_PENDING);
>  	}
>  
> -	klp_cond_resched_enable();
> -
>  	klp_signals_cnt = 0;
>  }
>  
> @@ -617,9 +556,8 @@ void klp_init_transition(struct klp_patch *patch, int state)
>  	 * see a func in transition with a task->patch_state of KLP_UNDEFINED.
>  	 *
>  	 * Also enforce the order of the klp_target_state write and future
> -	 * TIF_PATCH_PENDING writes to ensure klp_update_patch_state() and
> -	 * __klp_sched_try_switch() don't set a task->patch_state to
> -	 * KLP_UNDEFINED.
> +	 * TIF_PATCH_PENDING writes to ensure klp_update_patch_state() doesn't
> +	 * set a task->patch_state to KLP_UNDEFINED.
>  	 */
>  	smp_wmb();
>  
> @@ -655,10 +593,14 @@ void klp_reverse_transition(void)
>  		 klp_target_state == KLP_PATCHED ? "patching to unpatching" :
>  						   "unpatching to patching");
>  
> +	klp_transition_patch->enabled = !klp_transition_patch->enabled;
> +
> +	klp_target_state = !klp_target_state;
> +
>  	/*
>  	 * Clear all TIF_PATCH_PENDING flags to prevent races caused by
> -	 * klp_update_patch_state() or __klp_sched_try_switch() running in
> -	 * parallel with the reverse transition.
> +	 * klp_update_patch_state() running in parallel with
> +	 * klp_start_transition().
>  	 */
>  	read_lock(&tasklist_lock);
>  	for_each_process_thread(g, task)
> @@ -668,28 +610,9 @@ void klp_reverse_transition(void)
>  	for_each_possible_cpu(cpu)
>  		clear_tsk_thread_flag(idle_task(cpu), TIF_PATCH_PENDING);
>  
> -	/*
> -	 * Make sure all existing invocations of klp_update_patch_state() and
> -	 * __klp_sched_try_switch() see the cleared TIF_PATCH_PENDING before
> -	 * starting the reverse transition.
> -	 */
> +	/* Let any remaining calls to klp_update_patch_state() complete */
>  	klp_synchronize_transition();
>  
> -	/*
> -	 * All patching has stopped, now re-initialize the global variables to
> -	 * prepare for the reverse transition.
> -	 */
> -	klp_transition_patch->enabled = !klp_transition_patch->enabled;
> -	klp_target_state = !klp_target_state;
> -
> -	/*
> -	 * Enforce the order of the klp_target_state write and the
> -	 * TIF_PATCH_PENDING writes in klp_start_transition() to ensure
> -	 * klp_update_patch_state() and __klp_sched_try_switch() don't set
> -	 * task->patch_state to the wrong value.
> -	 */
> -	smp_wmb();
> -
>  	klp_start_transition();
>  }
>  
> @@ -703,9 +626,9 @@ void klp_copy_process(struct task_struct *child)
>  	 * the task flag up to date with the parent here.
>  	 *
>  	 * The operation is serialized against all klp_*_transition()
> -	 * operations by the tasklist_lock. The only exceptions are
> -	 * klp_update_patch_state(current) and __klp_sched_try_switch(), but we
> -	 * cannot race with them because we are current.
> +	 * operations by the tasklist_lock. The only exception is
> +	 * klp_update_patch_state(current), but we cannot race with
> +	 * that because we are current.
>  	 */
>  	if (test_tsk_thread_flag(current, TIF_PATCH_PENDING))
>  		set_tsk_thread_flag(child, TIF_PATCH_PENDING);
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 0e8764d63041..b43fda3c5733 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -8597,7 +8597,6 @@ EXPORT_STATIC_CALL_TRAMP(might_resched);
>  static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
>  int __sched dynamic_cond_resched(void)
>  {
> -	klp_sched_try_switch();
>  	if (!static_branch_unlikely(&sk_dynamic_cond_resched))
>  		return 0;
>  	return __cond_resched();
> @@ -8746,17 +8745,13 @@ int sched_dynamic_mode(const char *str)
>  #error "Unsupported PREEMPT_DYNAMIC mechanism"
>  #endif
>  
> -DEFINE_MUTEX(sched_dynamic_mutex);
> -static bool klp_override;
> -
> -static void __sched_dynamic_update(int mode)
> +void sched_dynamic_update(int mode)
>  {
>  	/*
>  	 * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
>  	 * the ZERO state, which is invalid.
>  	 */
> -	if (!klp_override)
> -		preempt_dynamic_enable(cond_resched);
> +	preempt_dynamic_enable(cond_resched);
>  	preempt_dynamic_enable(might_resched);
>  	preempt_dynamic_enable(preempt_schedule);
>  	preempt_dynamic_enable(preempt_schedule_notrace);
> @@ -8764,79 +8759,36 @@ static void __sched_dynamic_update(int mode)
>  
>  	switch (mode) {
>  	case preempt_dynamic_none:
> -		if (!klp_override)
> -			preempt_dynamic_enable(cond_resched);
> +		preempt_dynamic_enable(cond_resched);
>  		preempt_dynamic_disable(might_resched);
>  		preempt_dynamic_disable(preempt_schedule);
>  		preempt_dynamic_disable(preempt_schedule_notrace);
>  		preempt_dynamic_disable(irqentry_exit_cond_resched);
> -		if (mode != preempt_dynamic_mode)
> -			pr_info("Dynamic Preempt: none\n");
> +		pr_info("Dynamic Preempt: none\n");
>  		break;
>  
>  	case preempt_dynamic_voluntary:
> -		if (!klp_override)
> -			preempt_dynamic_enable(cond_resched);
> +		preempt_dynamic_enable(cond_resched);
>  		preempt_dynamic_enable(might_resched);
>  		preempt_dynamic_disable(preempt_schedule);
>  		preempt_dynamic_disable(preempt_schedule_notrace);
>  		preempt_dynamic_disable(irqentry_exit_cond_resched);
> -		if (mode != preempt_dynamic_mode)
> -			pr_info("Dynamic Preempt: voluntary\n");
> +		pr_info("Dynamic Preempt: voluntary\n");
>  		break;
>  
>  	case preempt_dynamic_full:
> -		if (!klp_override)
> -			preempt_dynamic_disable(cond_resched);
> +		preempt_dynamic_disable(cond_resched);
>  		preempt_dynamic_disable(might_resched);
>  		preempt_dynamic_enable(preempt_schedule);
>  		preempt_dynamic_enable(preempt_schedule_notrace);
>  		preempt_dynamic_enable(irqentry_exit_cond_resched);
> -		if (mode != preempt_dynamic_mode)
> -			pr_info("Dynamic Preempt: full\n");
> +		pr_info("Dynamic Preempt: full\n");
>  		break;
>  	}
>  
>  	preempt_dynamic_mode = mode;
>  }
>  
> -void sched_dynamic_update(int mode)
> -{
> -	mutex_lock(&sched_dynamic_mutex);
> -	__sched_dynamic_update(mode);
> -	mutex_unlock(&sched_dynamic_mutex);
> -}
> -
> -#ifdef CONFIG_HAVE_PREEMPT_DYNAMIC_CALL
> -
> -static int klp_cond_resched(void)
> -{
> -	__klp_sched_try_switch();
> -	return __cond_resched();
> -}
> -
> -void sched_dynamic_klp_enable(void)
> -{
> -	mutex_lock(&sched_dynamic_mutex);
> -
> -	klp_override = true;
> -	static_call_update(cond_resched, klp_cond_resched);
> -
> -	mutex_unlock(&sched_dynamic_mutex);
> -}
> -
> -void sched_dynamic_klp_disable(void)
> -{
> -	mutex_lock(&sched_dynamic_mutex);
> -
> -	klp_override = false;
> -	__sched_dynamic_update(preempt_dynamic_mode);
> -
> -	mutex_unlock(&sched_dynamic_mutex);
> -}
> -
> -#endif /* CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
> -
>  static int __init setup_preempt_mode(char *str)
>  {
>  	int mode = sched_dynamic_mode(str);
  
Ankur Arora Nov. 8, 2023, 4:55 a.m. UTC | #2
Steven Rostedt <rostedt@goodmis.org> writes:

> On Tue,  7 Nov 2023 13:56:53 -0800
> Ankur Arora <ankur.a.arora@oracle.com> wrote:
>
>> This reverts commit e3ff7c609f39671d1aaff4fb4a8594e14f3e03f8.
>>
>> Note that removing this commit reintroduces "live patches failing to
>> complete within a reasonable amount of time due to CPU-bound kthreads."
>>
>> Unfortunately this fix depends quite critically on PREEMPT_DYNAMIC and
>> existence of cond_resched() so this will need an alternate fix.
>>
>
> Then it would probably be a good idea to Cc the live patching maintainers!

Indeed. Could have sworn that I had. But clearly not.

Apologies and thanks for adding them.

>> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>> ---
>>  include/linux/livepatch.h       |   1 -
>>  include/linux/livepatch_sched.h |  29 ---------
>>  include/linux/sched.h           |  20 ++----
>>  kernel/livepatch/core.c         |   1 -
>>  kernel/livepatch/transition.c   | 107 +++++---------------------------
>>  kernel/sched/core.c             |  64 +++----------------
>>  6 files changed, 28 insertions(+), 194 deletions(-)
>>  delete mode 100644 include/linux/livepatch_sched.h
>>
>> diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
>> index 9b9b38e89563..293e29960c6e 100644
>> --- a/include/linux/livepatch.h
>> +++ b/include/linux/livepatch.h
>> @@ -13,7 +13,6 @@
>>  #include <linux/ftrace.h>
>>  #include <linux/completion.h>
>>  #include <linux/list.h>
>> -#include <linux/livepatch_sched.h>
>>
>>  #if IS_ENABLED(CONFIG_LIVEPATCH)
>>
>> diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h
>> deleted file mode 100644
>> index 013794fb5da0..000000000000
>> --- a/include/linux/livepatch_sched.h
>> +++ /dev/null
>> @@ -1,29 +0,0 @@
>> -/* SPDX-License-Identifier: GPL-2.0-or-later */
>> -#ifndef _LINUX_LIVEPATCH_SCHED_H_
>> -#define _LINUX_LIVEPATCH_SCHED_H_
>> -
>> -#include <linux/jump_label.h>
>> -#include <linux/static_call_types.h>
>> -
>> -#ifdef CONFIG_LIVEPATCH
>> -
>> -void __klp_sched_try_switch(void);
>> -
>> -#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
>> -
>> -DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
>> -
>> -static __always_inline void klp_sched_try_switch(void)
>> -{
>> -	if (static_branch_unlikely(&klp_sched_try_switch_key))
>> -		__klp_sched_try_switch();
>> -}
>> -
>> -#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
>> -
>> -#else /* !CONFIG_LIVEPATCH */
>> -static inline void klp_sched_try_switch(void) {}
>> -static inline void __klp_sched_try_switch(void) {}
>> -#endif /* CONFIG_LIVEPATCH */
>> -
>> -#endif /* _LINUX_LIVEPATCH_SCHED_H_ */
>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>> index 5bdf80136e42..c5b0ef1ecfe4 100644
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -36,7 +36,6 @@
>>  #include <linux/seqlock.h>
>>  #include <linux/kcsan.h>
>>  #include <linux/rv.h>
>> -#include <linux/livepatch_sched.h>
>>  #include <asm/kmap_size.h>
>>
>>  /* task_struct member predeclarations (sorted alphabetically): */
>> @@ -2087,9 +2086,6 @@ extern int __cond_resched(void);
>>
>>  #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
>>
>> -void sched_dynamic_klp_enable(void);
>> -void sched_dynamic_klp_disable(void);
>> -
>>  DECLARE_STATIC_CALL(cond_resched, __cond_resched);
>>
>>  static __always_inline int _cond_resched(void)
>> @@ -2098,7 +2094,6 @@ static __always_inline int _cond_resched(void)
>>  }
>>
>>  #elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
>> -
>>  extern int dynamic_cond_resched(void);
>>
>>  static __always_inline int _cond_resched(void)
>> @@ -2106,25 +2101,20 @@ static __always_inline int _cond_resched(void)
>>  	return dynamic_cond_resched();
>>  }
>>
>> -#else /* !CONFIG_PREEMPTION */
>> +#else
>>
>>  static inline int _cond_resched(void)
>>  {
>> -	klp_sched_try_switch();
>>  	return __cond_resched();
>>  }
>>
>> -#endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
>> +#endif /* CONFIG_PREEMPT_DYNAMIC */
>>
>> -#else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */
>> +#else
>>
>> -static inline int _cond_resched(void)
>> -{
>> -	klp_sched_try_switch();
>> -	return 0;
>> -}
>> +static inline int _cond_resched(void) { return 0; }
>>
>> -#endif /* !CONFIG_PREEMPTION || CONFIG_PREEMPT_DYNAMIC */
>> +#endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
>>
>>  #define cond_resched() ({			\
>>  	__might_resched(__FILE__, __LINE__, 0);	\
>> diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
>> index 61328328c474..fc851455740c 100644
>> --- a/kernel/livepatch/core.c
>> +++ b/kernel/livepatch/core.c
>> @@ -33,7 +33,6 @@
>>   *
>>   * - klp_ftrace_handler()
>>   * - klp_update_patch_state()
>> - * - __klp_sched_try_switch()
>>   */
>>  DEFINE_MUTEX(klp_mutex);
>>
>> diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
>> index e54c3d60a904..70bc38f27af7 100644
>> --- a/kernel/livepatch/transition.c
>> +++ b/kernel/livepatch/transition.c
>> @@ -9,7 +9,6 @@
>>
>>  #include <linux/cpu.h>
>>  #include <linux/stacktrace.h>
>> -#include <linux/static_call.h>
>>  #include "core.h"
>>  #include "patch.h"
>>  #include "transition.h"
>> @@ -27,25 +26,6 @@ static int klp_target_state = KLP_UNDEFINED;
>>
>>  static unsigned int klp_signals_cnt;
>>
>> -/*
>> - * When a livepatch is in progress, enable klp stack checking in
>> - * cond_resched().  This helps CPU-bound kthreads get patched.
>> - */
>> -#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
>> -
>> -#define klp_cond_resched_enable() sched_dynamic_klp_enable()
>> -#define klp_cond_resched_disable() sched_dynamic_klp_disable()
>> -
>> -#else /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
>> -
>> -DEFINE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
>> -EXPORT_SYMBOL(klp_sched_try_switch_key);
>> -
>> -#define klp_cond_resched_enable() static_branch_enable(&klp_sched_try_switch_key)
>> -#define klp_cond_resched_disable() static_branch_disable(&klp_sched_try_switch_key)
>> -
>> -#endif /* CONFIG_PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
>> -
>>  /*
>>   * This work can be performed periodically to finish patching or unpatching any
>>   * "straggler" tasks which failed to transition in the first attempt.
>> @@ -194,8 +174,8 @@ void klp_update_patch_state(struct task_struct *task)
>>  	 * barrier (smp_rmb) for two cases:
>>  	 *
>>  	 * 1) Enforce the order of the TIF_PATCH_PENDING read and the
>> -	 *    klp_target_state read.  The corresponding write barriers are in
>> -	 *    klp_init_transition() and klp_reverse_transition().
>> +	 *    klp_target_state read.  The corresponding write barrier is in
>> +	 *    klp_init_transition().
>>  	 *
>>  	 * 2) Enforce the order of the TIF_PATCH_PENDING read and a future read
>>  	 *    of func->transition, if klp_ftrace_handler() is called later on
>> @@ -363,44 +343,6 @@ static bool klp_try_switch_task(struct task_struct *task)
>>  	return !ret;
>>  }
>>
>> -void __klp_sched_try_switch(void)
>> -{
>> -	if (likely(!klp_patch_pending(current)))
>> -		return;
>> -
>> -	/*
>> -	 * This function is called from cond_resched() which is called in many
>> -	 * places throughout the kernel.  Using the klp_mutex here might
>> -	 * deadlock.
>> -	 *
>> -	 * Instead, disable preemption to prevent racing with other callers of
>> -	 * klp_try_switch_task().  Thanks to task_call_func() they won't be
>> -	 * able to switch this task while it's running.
>> -	 */
>> -	preempt_disable();
>> -
>> -	/*
>> -	 * Make sure current didn't get patched between the above check and
>> -	 * preempt_disable().
>> -	 */
>> -	if (unlikely(!klp_patch_pending(current)))
>> -		goto out;
>> -
>> -	/*
>> -	 * Enforce the order of the TIF_PATCH_PENDING read above and the
>> -	 * klp_target_state read in klp_try_switch_task().  The corresponding
>> -	 * write barriers are in klp_init_transition() and
>> -	 * klp_reverse_transition().
>> -	 */
>> -	smp_rmb();
>> -
>> -	klp_try_switch_task(current);
>> -
>> -out:
>> -	preempt_enable();
>> -}
>> -EXPORT_SYMBOL(__klp_sched_try_switch);
>> -
>>  /*
>>   * Sends a fake signal to all non-kthread tasks with TIF_PATCH_PENDING set.
>>   * Kthreads with TIF_PATCH_PENDING set are woken up.
>> @@ -507,8 +449,7 @@ void klp_try_complete_transition(void)
>>  		return;
>>  	}
>>
>> -	/* Done!  Now cleanup the data structures. */
>> -	klp_cond_resched_disable();
>> +	/* we're done, now cleanup the data structures */
>>  	patch = klp_transition_patch;
>>  	klp_complete_transition();
>>
>> @@ -560,8 +501,6 @@ void klp_start_transition(void)
>>  			set_tsk_thread_flag(task, TIF_PATCH_PENDING);
>>  	}
>>
>> -	klp_cond_resched_enable();
>> -
>>  	klp_signals_cnt = 0;
>>  }
>>
>> @@ -617,9 +556,8 @@ void klp_init_transition(struct klp_patch *patch, int state)
>>  	 * see a func in transition with a task->patch_state of KLP_UNDEFINED.
>>  	 *
>>  	 * Also enforce the order of the klp_target_state write and future
>> -	 * TIF_PATCH_PENDING writes to ensure klp_update_patch_state() and
>> -	 * __klp_sched_try_switch() don't set a task->patch_state to
>> -	 * KLP_UNDEFINED.
>> +	 * TIF_PATCH_PENDING writes to ensure klp_update_patch_state() doesn't
>> +	 * set a task->patch_state to KLP_UNDEFINED.
>>  	 */
>>  	smp_wmb();
>>
>> @@ -655,10 +593,14 @@ void klp_reverse_transition(void)
>>  		 klp_target_state == KLP_PATCHED ? "patching to unpatching" :
>>  						   "unpatching to patching");
>>
>> +	klp_transition_patch->enabled = !klp_transition_patch->enabled;
>> +
>> +	klp_target_state = !klp_target_state;
>> +
>>  	/*
>>  	 * Clear all TIF_PATCH_PENDING flags to prevent races caused by
>> -	 * klp_update_patch_state() or __klp_sched_try_switch() running in
>> -	 * parallel with the reverse transition.
>> +	 * klp_update_patch_state() running in parallel with
>> +	 * klp_start_transition().
>>  	 */
>>  	read_lock(&tasklist_lock);
>>  	for_each_process_thread(g, task)
>> @@ -668,28 +610,9 @@ void klp_reverse_transition(void)
>>  	for_each_possible_cpu(cpu)
>>  		clear_tsk_thread_flag(idle_task(cpu), TIF_PATCH_PENDING);
>>
>> -	/*
>> -	 * Make sure all existing invocations of klp_update_patch_state() and
>> -	 * __klp_sched_try_switch() see the cleared TIF_PATCH_PENDING before
>> -	 * starting the reverse transition.
>> -	 */
>> +	/* Let any remaining calls to klp_update_patch_state() complete */
>>  	klp_synchronize_transition();
>>
>> -	/*
>> -	 * All patching has stopped, now re-initialize the global variables to
>> -	 * prepare for the reverse transition.
>> -	 */
>> -	klp_transition_patch->enabled = !klp_transition_patch->enabled;
>> -	klp_target_state = !klp_target_state;
>> -
>> -	/*
>> -	 * Enforce the order of the klp_target_state write and the
>> -	 * TIF_PATCH_PENDING writes in klp_start_transition() to ensure
>> -	 * klp_update_patch_state() and __klp_sched_try_switch() don't set
>> -	 * task->patch_state to the wrong value.
>> -	 */
>> -	smp_wmb();
>> -
>>  	klp_start_transition();
>>  }
>>
>> @@ -703,9 +626,9 @@ void klp_copy_process(struct task_struct *child)
>>  	 * the task flag up to date with the parent here.
>>  	 *
>>  	 * The operation is serialized against all klp_*_transition()
>> -	 * operations by the tasklist_lock. The only exceptions are
>> -	 * klp_update_patch_state(current) and __klp_sched_try_switch(), but we
>> -	 * cannot race with them because we are current.
>> +	 * operations by the tasklist_lock. The only exception is
>> +	 * klp_update_patch_state(current), but we cannot race with
>> +	 * that because we are current.
>>  	 */
>>  	if (test_tsk_thread_flag(current, TIF_PATCH_PENDING))
>>  		set_tsk_thread_flag(child, TIF_PATCH_PENDING);
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index 0e8764d63041..b43fda3c5733 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -8597,7 +8597,6 @@ EXPORT_STATIC_CALL_TRAMP(might_resched);
>>  static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
>>  int __sched dynamic_cond_resched(void)
>>  {
>> -	klp_sched_try_switch();
>>  	if (!static_branch_unlikely(&sk_dynamic_cond_resched))
>>  		return 0;
>>  	return __cond_resched();
>> @@ -8746,17 +8745,13 @@ int sched_dynamic_mode(const char *str)
>>  #error "Unsupported PREEMPT_DYNAMIC mechanism"
>>  #endif
>>
>> -DEFINE_MUTEX(sched_dynamic_mutex);
>> -static bool klp_override;
>> -
>> -static void __sched_dynamic_update(int mode)
>> +void sched_dynamic_update(int mode)
>>  {
>>  	/*
>>  	 * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
>>  	 * the ZERO state, which is invalid.
>>  	 */
>> -	if (!klp_override)
>> -		preempt_dynamic_enable(cond_resched);
>> +	preempt_dynamic_enable(cond_resched);
>>  	preempt_dynamic_enable(might_resched);
>>  	preempt_dynamic_enable(preempt_schedule);
>>  	preempt_dynamic_enable(preempt_schedule_notrace);
>> @@ -8764,79 +8759,36 @@ static void __sched_dynamic_update(int mode)
>>
>>  	switch (mode) {
>>  	case preempt_dynamic_none:
>> -		if (!klp_override)
>> -			preempt_dynamic_enable(cond_resched);
>> +		preempt_dynamic_enable(cond_resched);
>>  		preempt_dynamic_disable(might_resched);
>>  		preempt_dynamic_disable(preempt_schedule);
>>  		preempt_dynamic_disable(preempt_schedule_notrace);
>>  		preempt_dynamic_disable(irqentry_exit_cond_resched);
>> -		if (mode != preempt_dynamic_mode)
>> -			pr_info("Dynamic Preempt: none\n");
>> +		pr_info("Dynamic Preempt: none\n");
>>  		break;
>>
>>  	case preempt_dynamic_voluntary:
>> -		if (!klp_override)
>> -			preempt_dynamic_enable(cond_resched);
>> +		preempt_dynamic_enable(cond_resched);
>>  		preempt_dynamic_enable(might_resched);
>>  		preempt_dynamic_disable(preempt_schedule);
>>  		preempt_dynamic_disable(preempt_schedule_notrace);
>>  		preempt_dynamic_disable(irqentry_exit_cond_resched);
>> -		if (mode != preempt_dynamic_mode)
>> -			pr_info("Dynamic Preempt: voluntary\n");
>> +		pr_info("Dynamic Preempt: voluntary\n");
>>  		break;
>>
>>  	case preempt_dynamic_full:
>> -		if (!klp_override)
>> -			preempt_dynamic_disable(cond_resched);
>> +		preempt_dynamic_disable(cond_resched);
>>  		preempt_dynamic_disable(might_resched);
>>  		preempt_dynamic_enable(preempt_schedule);
>>  		preempt_dynamic_enable(preempt_schedule_notrace);
>>  		preempt_dynamic_enable(irqentry_exit_cond_resched);
>> -		if (mode != preempt_dynamic_mode)
>> -			pr_info("Dynamic Preempt: full\n");
>> +		pr_info("Dynamic Preempt: full\n");
>>  		break;
>>  	}
>>
>>  	preempt_dynamic_mode = mode;
>>  }
>>
>> -void sched_dynamic_update(int mode)
>> -{
>> -	mutex_lock(&sched_dynamic_mutex);
>> -	__sched_dynamic_update(mode);
>> -	mutex_unlock(&sched_dynamic_mutex);
>> -}
>> -
>> -#ifdef CONFIG_HAVE_PREEMPT_DYNAMIC_CALL
>> -
>> -static int klp_cond_resched(void)
>> -{
>> -	__klp_sched_try_switch();
>> -	return __cond_resched();
>> -}
>> -
>> -void sched_dynamic_klp_enable(void)
>> -{
>> -	mutex_lock(&sched_dynamic_mutex);
>> -
>> -	klp_override = true;
>> -	static_call_update(cond_resched, klp_cond_resched);
>> -
>> -	mutex_unlock(&sched_dynamic_mutex);
>> -}
>> -
>> -void sched_dynamic_klp_disable(void)
>> -{
>> -	mutex_lock(&sched_dynamic_mutex);
>> -
>> -	klp_override = false;
>> -	__sched_dynamic_update(preempt_dynamic_mode);
>> -
>> -	mutex_unlock(&sched_dynamic_mutex);
>> -}
>> -
>> -#endif /* CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
>> -
>>  static int __init setup_preempt_mode(char *str)
>>  {
>>  	int mode = sched_dynamic_mode(str);


--
ankur
  
Josh Poimboeuf Nov. 9, 2023, 5:26 p.m. UTC | #3
On Tue, Nov 07, 2023 at 06:16:09PM -0500, Steven Rostedt wrote:
> On Tue,  7 Nov 2023 13:56:53 -0800
> Ankur Arora <ankur.a.arora@oracle.com> wrote:
> 
> > This reverts commit e3ff7c609f39671d1aaff4fb4a8594e14f3e03f8.
> > 
> > Note that removing this commit reintroduces "live patches failing to
> > complete within a reasonable amount of time due to CPU-bound kthreads."
> > 
> > Unfortunately this fix depends quite critically on PREEMPT_DYNAMIC and
> > existence of cond_resched() so this will need an alternate fix.

We definitely don't want to introduce a regression, something will need
to be figured out before removing cond_resched().

We could hook into preempt_schedule_irq(), but that wouldn't work for
non-ORC.

Another option would be to hook into schedule().  Then livepatch could
set TIF_NEED_RESCHED on remaining unpatched tasks.  But again if they go
through the preemption path then we have the same problem for non-ORC.

Worst case we'll need to sprinkle cond_livepatch() everywhere :-/
  
Steven Rostedt Nov. 9, 2023, 5:31 p.m. UTC | #4
On Thu, 9 Nov 2023 09:26:37 -0800
Josh Poimboeuf <jpoimboe@kernel.org> wrote:

> On Tue, Nov 07, 2023 at 06:16:09PM -0500, Steven Rostedt wrote:
> > On Tue,  7 Nov 2023 13:56:53 -0800
> > Ankur Arora <ankur.a.arora@oracle.com> wrote:
> >   
> > > This reverts commit e3ff7c609f39671d1aaff4fb4a8594e14f3e03f8.
> > > 
> > > Note that removing this commit reintroduces "live patches failing to
> > > complete within a reasonable amount of time due to CPU-bound kthreads."
> > > 
> > > Unfortunately this fix depends quite critically on PREEMPT_DYNAMIC and
> > > existence of cond_resched() so this will need an alternate fix.  
> 
> We definitely don't want to introduce a regression, something will need
> to be figured out before removing cond_resched().
> 
> We could hook into preempt_schedule_irq(), but that wouldn't work for
> non-ORC.
> 
> Another option would be to hook into schedule().  Then livepatch could
> set TIF_NEED_RESCHED on remaining unpatched tasks.  But again if they go
> through the preemption path then we have the same problem for non-ORC.
> 
> Worst case we'll need to sprinkle cond_livepatch() everywhere :-/
> 

I guess I'm not fully understanding what the cond rescheds are for. But
would an IPI to all CPUs setting NEED_RESCHED, fix it?

-- Steve
  
Josh Poimboeuf Nov. 9, 2023, 5:51 p.m. UTC | #5
On Thu, Nov 09, 2023 at 12:31:47PM -0500, Steven Rostedt wrote:
> On Thu, 9 Nov 2023 09:26:37 -0800
> Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> 
> > On Tue, Nov 07, 2023 at 06:16:09PM -0500, Steven Rostedt wrote:
> > > On Tue,  7 Nov 2023 13:56:53 -0800
> > > Ankur Arora <ankur.a.arora@oracle.com> wrote:
> > >   
> > > > This reverts commit e3ff7c609f39671d1aaff4fb4a8594e14f3e03f8.
> > > > 
> > > > Note that removing this commit reintroduces "live patches failing to
> > > > complete within a reasonable amount of time due to CPU-bound kthreads."
> > > > 
> > > > Unfortunately this fix depends quite critically on PREEMPT_DYNAMIC and
> > > > existence of cond_resched() so this will need an alternate fix.  
> > 
> > We definitely don't want to introduce a regression, something will need
> > to be figured out before removing cond_resched().
> > 
> > We could hook into preempt_schedule_irq(), but that wouldn't work for
> > non-ORC.
> > 
> > Another option would be to hook into schedule().  Then livepatch could
> > set TIF_NEED_RESCHED on remaining unpatched tasks.  But again if they go
> > through the preemption path then we have the same problem for non-ORC.
> > 
> > Worst case we'll need to sprinkle cond_livepatch() everywhere :-/
> > 
> 
> I guess I'm not fully understanding what the cond rescheds are for. But
> would an IPI to all CPUs setting NEED_RESCHED, fix it?

If all livepatch arches had the ORC unwinder, yes.

The problem is that frame pointer (and similar) unwinders can't reliably
unwind past an interrupt frame.
  
Ankur Arora Nov. 9, 2023, 10:50 p.m. UTC | #6
Josh Poimboeuf <jpoimboe@kernel.org> writes:

> On Thu, Nov 09, 2023 at 12:31:47PM -0500, Steven Rostedt wrote:
>> On Thu, 9 Nov 2023 09:26:37 -0800
>> Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>>
>> > On Tue, Nov 07, 2023 at 06:16:09PM -0500, Steven Rostedt wrote:
>> > > On Tue,  7 Nov 2023 13:56:53 -0800
>> > > Ankur Arora <ankur.a.arora@oracle.com> wrote:
>> > >
>> > > > This reverts commit e3ff7c609f39671d1aaff4fb4a8594e14f3e03f8.
>> > > >
>> > > > Note that removing this commit reintroduces "live patches failing to
>> > > > complete within a reasonable amount of time due to CPU-bound kthreads."
>> > > >
>> > > > Unfortunately this fix depends quite critically on PREEMPT_DYNAMIC and
>> > > > existence of cond_resched() so this will need an alternate fix.
>> >
>> > We definitely don't want to introduce a regression, something will need
>> > to be figured out before removing cond_resched().
>> >
>> > We could hook into preempt_schedule_irq(), but that wouldn't work for
>> > non-ORC.
>> >
>> > Another option would be to hook into schedule().  Then livepatch could
>> > set TIF_NEED_RESCHED on remaining unpatched tasks.  But again if they go
>> > through the preemption path then we have the same problem for non-ORC.
>> >
>> > Worst case we'll need to sprinkle cond_livepatch() everywhere :-/
>> >
>>
>> I guess I'm not fully understanding what the cond rescheds are for. But
>> would an IPI to all CPUs setting NEED_RESCHED, fix it?

Yeah. We could just temporarily toggle to full preemption, when
NEED_RESCHED_LAZY is always upgraded to NEED_RESCHED which will
then send IPIs.

> If all livepatch arches had the ORC unwinder, yes.
>
> The problem is that frame pointer (and similar) unwinders can't reliably
> unwind past an interrupt frame.

Ah, I wonder if we could just disable the preempt_schedule_irq() path
temporarily? Hooking into schedule() alongside something like this:

@@ -379,7 +379,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)

 void irqentry_exit_cond_resched(void)
 {
-       if (!preempt_count()) {
+       if (klp_cond_resched_disable() && !preempt_count()) {

The problem would be tasks that don't go through any preemptible
sections.

--
ankur
  
Josh Poimboeuf Nov. 9, 2023, 11:47 p.m. UTC | #7
On Thu, Nov 09, 2023 at 02:50:48PM -0800, Ankur Arora wrote:
> >> I guess I'm not fully understanding what the cond rescheds are for. But
> >> would an IPI to all CPUs setting NEED_RESCHED, fix it?
> 
> Yeah. We could just temporarily toggle to full preemption, when
> NEED_RESCHED_LAZY is always upgraded to NEED_RESCHED which will
> then send IPIs.
> 
> > If all livepatch arches had the ORC unwinder, yes.
> >
> > The problem is that frame pointer (and similar) unwinders can't reliably
> > unwind past an interrupt frame.
> 
> Ah, I wonder if we could just disable the preempt_schedule_irq() path
> temporarily? Hooking into schedule() alongside something like this:
> 
> @@ -379,7 +379,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
> 
>  void irqentry_exit_cond_resched(void)
>  {
> -       if (!preempt_count()) {
> +       if (klp_cond_resched_disable() && !preempt_count()) {
> 
> The problem would be tasks that don't go through any preemptible
> sections.

Let me back up a bit and explain what klp is trying to do.

When a livepatch is applied, klp needs to unwind all the tasks,
preferably within a reasonable amount of time.

We can't unwind task A from task B while task A is running, since task A
could be changing the stack during the unwind.  So task A needs to be
blocked or asleep.  The only exception to that is if the unwind happens
in the context of task A itself.

The problem we were seeing was CPU-bound kthreads (e.g., vhost_worker)
not getting patched within a reasonable amount of time.  We fixed it by
hooking the klp unwind into cond_resched() so it can unwind from the
task itself.

It only worked because we had a non-preempted hook (because non-ORC
unwinders can't unwind reliably through preemption) which called klp to
unwind from the context of the task.

Without something to hook into, we have a problem.  We could of course
hook into schedule(), but if the kthread never calls schedule() from a
non-preempted context then it still doesn't help.
  
Ankur Arora Nov. 10, 2023, 12:46 a.m. UTC | #8
Josh Poimboeuf <jpoimboe@kernel.org> writes:

> On Thu, Nov 09, 2023 at 02:50:48PM -0800, Ankur Arora wrote:
>> >> I guess I'm not fully understanding what the cond rescheds are for. But
>> >> would an IPI to all CPUs setting NEED_RESCHED, fix it?
>>
>> Yeah. We could just temporarily toggle to full preemption, when
>> NEED_RESCHED_LAZY is always upgraded to NEED_RESCHED which will
>> then send IPIs.
>>
>> > If all livepatch arches had the ORC unwinder, yes.
>> >
>> > The problem is that frame pointer (and similar) unwinders can't reliably
>> > unwind past an interrupt frame.
>>
>> Ah, I wonder if we could just disable the preempt_schedule_irq() path
>> temporarily? Hooking into schedule() alongside something like this:
>>
>> @@ -379,7 +379,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
>>
>>  void irqentry_exit_cond_resched(void)
>>  {
>> -       if (!preempt_count()) {
>> +       if (klp_cond_resched_disable() && !preempt_count()) {
>>
>> The problem would be tasks that don't go through any preemptible
>> sections.
>
> Let me back up a bit and explain what klp is trying to do.
>
> When a livepatch is applied, klp needs to unwind all the tasks,
> preferably within a reasonable amount of time.
>
> We can't unwind task A from task B while task A is running, since task A
> could be changing the stack during the unwind.  So task A needs to be
> blocked or asleep.  The only exception to that is if the unwind happens
> in the context of task A itself.

> The problem we were seeing was CPU-bound kthreads (e.g., vhost_worker)
> not getting patched within a reasonable amount of time.  We fixed it by
> hooking the klp unwind into cond_resched() so it can unwind from the
> task itself.

Right, so the task calls schedule() itself via cond_resched() and that
works. If the task schedules out by calling preempt_enable() that
presumably works as well.

So, that leaves two paths where we can't unwind:

 1. a task never entering or leaving preemptible sections
 2. or, a task which gets preempted in irqentry_exit_cond_resched()
   This we could disable temporarily.

> It only worked because we had a non-preempted hook (because non-ORC
> unwinders can't unwind reliably through preemption) which called klp to
> unwind from the context of the task.
>
> Without something to hook into, we have a problem.  We could of course
> hook into schedule(), but if the kthread never calls schedule() from a
> non-preempted context then it still doesn't help.

Yeah agreed. The first one is a problem. And, that's a problem with the
removal of cond_resched() generally. Because the way to fix case 1 was
typically to add a cond_resched() when softlockups were seen or in
code review.

--
ankur
  
Steven Rostedt Nov. 10, 2023, 12:56 a.m. UTC | #9
On Thu, 9 Nov 2023 09:51:18 -0800
Josh Poimboeuf <jpoimboe@kernel.org> wrote:

> > I guess I'm not fully understanding what the cond rescheds are for. But
> > would an IPI to all CPUs setting NEED_RESCHED, fix it?  
> 
> If all livepatch arches had the ORC unwinder, yes.
> 
> The problem is that frame pointer (and similar) unwinders can't reliably
> unwind past an interrupt frame.

Perhaps we can use this to push those archs with bad unwinders to port over
ORC unwinding ;-)

-- Steve
  

Patch

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index 9b9b38e89563..293e29960c6e 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -13,7 +13,6 @@ 
 #include <linux/ftrace.h>
 #include <linux/completion.h>
 #include <linux/list.h>
-#include <linux/livepatch_sched.h>
 
 #if IS_ENABLED(CONFIG_LIVEPATCH)
 
diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h
deleted file mode 100644
index 013794fb5da0..000000000000
--- a/include/linux/livepatch_sched.h
+++ /dev/null
@@ -1,29 +0,0 @@ 
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _LINUX_LIVEPATCH_SCHED_H_
-#define _LINUX_LIVEPATCH_SCHED_H_
-
-#include <linux/jump_label.h>
-#include <linux/static_call_types.h>
-
-#ifdef CONFIG_LIVEPATCH
-
-void __klp_sched_try_switch(void);
-
-#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
-
-DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
-
-static __always_inline void klp_sched_try_switch(void)
-{
-	if (static_branch_unlikely(&klp_sched_try_switch_key))
-		__klp_sched_try_switch();
-}
-
-#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
-
-#else /* !CONFIG_LIVEPATCH */
-static inline void klp_sched_try_switch(void) {}
-static inline void __klp_sched_try_switch(void) {}
-#endif /* CONFIG_LIVEPATCH */
-
-#endif /* _LINUX_LIVEPATCH_SCHED_H_ */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5bdf80136e42..c5b0ef1ecfe4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -36,7 +36,6 @@ 
 #include <linux/seqlock.h>
 #include <linux/kcsan.h>
 #include <linux/rv.h>
-#include <linux/livepatch_sched.h>
 #include <asm/kmap_size.h>
 
 /* task_struct member predeclarations (sorted alphabetically): */
@@ -2087,9 +2086,6 @@  extern int __cond_resched(void);
 
 #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
 
-void sched_dynamic_klp_enable(void);
-void sched_dynamic_klp_disable(void);
-
 DECLARE_STATIC_CALL(cond_resched, __cond_resched);
 
 static __always_inline int _cond_resched(void)
@@ -2098,7 +2094,6 @@  static __always_inline int _cond_resched(void)
 }
 
 #elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
-
 extern int dynamic_cond_resched(void);
 
 static __always_inline int _cond_resched(void)
@@ -2106,25 +2101,20 @@  static __always_inline int _cond_resched(void)
 	return dynamic_cond_resched();
 }
 
-#else /* !CONFIG_PREEMPTION */
+#else
 
 static inline int _cond_resched(void)
 {
-	klp_sched_try_switch();
 	return __cond_resched();
 }
 
-#endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
+#endif /* CONFIG_PREEMPT_DYNAMIC */
 
-#else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */
+#else
 
-static inline int _cond_resched(void)
-{
-	klp_sched_try_switch();
-	return 0;
-}
+static inline int _cond_resched(void) { return 0; }
 
-#endif /* !CONFIG_PREEMPTION || CONFIG_PREEMPT_DYNAMIC */
+#endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
 
 #define cond_resched() ({			\
 	__might_resched(__FILE__, __LINE__, 0);	\
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 61328328c474..fc851455740c 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -33,7 +33,6 @@ 
  *
  * - klp_ftrace_handler()
  * - klp_update_patch_state()
- * - __klp_sched_try_switch()
  */
 DEFINE_MUTEX(klp_mutex);
 
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index e54c3d60a904..70bc38f27af7 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -9,7 +9,6 @@ 
 
 #include <linux/cpu.h>
 #include <linux/stacktrace.h>
-#include <linux/static_call.h>
 #include "core.h"
 #include "patch.h"
 #include "transition.h"
@@ -27,25 +26,6 @@  static int klp_target_state = KLP_UNDEFINED;
 
 static unsigned int klp_signals_cnt;
 
-/*
- * When a livepatch is in progress, enable klp stack checking in
- * cond_resched().  This helps CPU-bound kthreads get patched.
- */
-#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
-
-#define klp_cond_resched_enable() sched_dynamic_klp_enable()
-#define klp_cond_resched_disable() sched_dynamic_klp_disable()
-
-#else /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
-
-DEFINE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
-EXPORT_SYMBOL(klp_sched_try_switch_key);
-
-#define klp_cond_resched_enable() static_branch_enable(&klp_sched_try_switch_key)
-#define klp_cond_resched_disable() static_branch_disable(&klp_sched_try_switch_key)
-
-#endif /* CONFIG_PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
-
 /*
  * This work can be performed periodically to finish patching or unpatching any
  * "straggler" tasks which failed to transition in the first attempt.
@@ -194,8 +174,8 @@  void klp_update_patch_state(struct task_struct *task)
 	 * barrier (smp_rmb) for two cases:
 	 *
 	 * 1) Enforce the order of the TIF_PATCH_PENDING read and the
-	 *    klp_target_state read.  The corresponding write barriers are in
-	 *    klp_init_transition() and klp_reverse_transition().
+	 *    klp_target_state read.  The corresponding write barrier is in
+	 *    klp_init_transition().
 	 *
 	 * 2) Enforce the order of the TIF_PATCH_PENDING read and a future read
 	 *    of func->transition, if klp_ftrace_handler() is called later on
@@ -363,44 +343,6 @@  static bool klp_try_switch_task(struct task_struct *task)
 	return !ret;
 }
 
-void __klp_sched_try_switch(void)
-{
-	if (likely(!klp_patch_pending(current)))
-		return;
-
-	/*
-	 * This function is called from cond_resched() which is called in many
-	 * places throughout the kernel.  Using the klp_mutex here might
-	 * deadlock.
-	 *
-	 * Instead, disable preemption to prevent racing with other callers of
-	 * klp_try_switch_task().  Thanks to task_call_func() they won't be
-	 * able to switch this task while it's running.
-	 */
-	preempt_disable();
-
-	/*
-	 * Make sure current didn't get patched between the above check and
-	 * preempt_disable().
-	 */
-	if (unlikely(!klp_patch_pending(current)))
-		goto out;
-
-	/*
-	 * Enforce the order of the TIF_PATCH_PENDING read above and the
-	 * klp_target_state read in klp_try_switch_task().  The corresponding
-	 * write barriers are in klp_init_transition() and
-	 * klp_reverse_transition().
-	 */
-	smp_rmb();
-
-	klp_try_switch_task(current);
-
-out:
-	preempt_enable();
-}
-EXPORT_SYMBOL(__klp_sched_try_switch);
-
 /*
  * Sends a fake signal to all non-kthread tasks with TIF_PATCH_PENDING set.
  * Kthreads with TIF_PATCH_PENDING set are woken up.
@@ -507,8 +449,7 @@  void klp_try_complete_transition(void)
 		return;
 	}
 
-	/* Done!  Now cleanup the data structures. */
-	klp_cond_resched_disable();
+	/* we're done, now cleanup the data structures */
 	patch = klp_transition_patch;
 	klp_complete_transition();
 
@@ -560,8 +501,6 @@  void klp_start_transition(void)
 			set_tsk_thread_flag(task, TIF_PATCH_PENDING);
 	}
 
-	klp_cond_resched_enable();
-
 	klp_signals_cnt = 0;
 }
 
@@ -617,9 +556,8 @@  void klp_init_transition(struct klp_patch *patch, int state)
 	 * see a func in transition with a task->patch_state of KLP_UNDEFINED.
 	 *
 	 * Also enforce the order of the klp_target_state write and future
-	 * TIF_PATCH_PENDING writes to ensure klp_update_patch_state() and
-	 * __klp_sched_try_switch() don't set a task->patch_state to
-	 * KLP_UNDEFINED.
+	 * TIF_PATCH_PENDING writes to ensure klp_update_patch_state() doesn't
+	 * set a task->patch_state to KLP_UNDEFINED.
 	 */
 	smp_wmb();
 
@@ -655,10 +593,14 @@  void klp_reverse_transition(void)
 		 klp_target_state == KLP_PATCHED ? "patching to unpatching" :
 						   "unpatching to patching");
 
+	klp_transition_patch->enabled = !klp_transition_patch->enabled;
+
+	klp_target_state = !klp_target_state;
+
 	/*
 	 * Clear all TIF_PATCH_PENDING flags to prevent races caused by
-	 * klp_update_patch_state() or __klp_sched_try_switch() running in
-	 * parallel with the reverse transition.
+	 * klp_update_patch_state() running in parallel with
+	 * klp_start_transition().
 	 */
 	read_lock(&tasklist_lock);
 	for_each_process_thread(g, task)
@@ -668,28 +610,9 @@  void klp_reverse_transition(void)
 	for_each_possible_cpu(cpu)
 		clear_tsk_thread_flag(idle_task(cpu), TIF_PATCH_PENDING);
 
-	/*
-	 * Make sure all existing invocations of klp_update_patch_state() and
-	 * __klp_sched_try_switch() see the cleared TIF_PATCH_PENDING before
-	 * starting the reverse transition.
-	 */
+	/* Let any remaining calls to klp_update_patch_state() complete */
 	klp_synchronize_transition();
 
-	/*
-	 * All patching has stopped, now re-initialize the global variables to
-	 * prepare for the reverse transition.
-	 */
-	klp_transition_patch->enabled = !klp_transition_patch->enabled;
-	klp_target_state = !klp_target_state;
-
-	/*
-	 * Enforce the order of the klp_target_state write and the
-	 * TIF_PATCH_PENDING writes in klp_start_transition() to ensure
-	 * klp_update_patch_state() and __klp_sched_try_switch() don't set
-	 * task->patch_state to the wrong value.
-	 */
-	smp_wmb();
-
 	klp_start_transition();
 }
 
@@ -703,9 +626,9 @@  void klp_copy_process(struct task_struct *child)
 	 * the task flag up to date with the parent here.
 	 *
 	 * The operation is serialized against all klp_*_transition()
-	 * operations by the tasklist_lock. The only exceptions are
-	 * klp_update_patch_state(current) and __klp_sched_try_switch(), but we
-	 * cannot race with them because we are current.
+	 * operations by the tasklist_lock. The only exception is
+	 * klp_update_patch_state(current), but we cannot race with
+	 * that because we are current.
 	 */
 	if (test_tsk_thread_flag(current, TIF_PATCH_PENDING))
 		set_tsk_thread_flag(child, TIF_PATCH_PENDING);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0e8764d63041..b43fda3c5733 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8597,7 +8597,6 @@  EXPORT_STATIC_CALL_TRAMP(might_resched);
 static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
 int __sched dynamic_cond_resched(void)
 {
-	klp_sched_try_switch();
 	if (!static_branch_unlikely(&sk_dynamic_cond_resched))
 		return 0;
 	return __cond_resched();
@@ -8746,17 +8745,13 @@  int sched_dynamic_mode(const char *str)
 #error "Unsupported PREEMPT_DYNAMIC mechanism"
 #endif
 
-DEFINE_MUTEX(sched_dynamic_mutex);
-static bool klp_override;
-
-static void __sched_dynamic_update(int mode)
+void sched_dynamic_update(int mode)
 {
 	/*
 	 * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
 	 * the ZERO state, which is invalid.
 	 */
-	if (!klp_override)
-		preempt_dynamic_enable(cond_resched);
+	preempt_dynamic_enable(cond_resched);
 	preempt_dynamic_enable(might_resched);
 	preempt_dynamic_enable(preempt_schedule);
 	preempt_dynamic_enable(preempt_schedule_notrace);
@@ -8764,79 +8759,36 @@  static void __sched_dynamic_update(int mode)
 
 	switch (mode) {
 	case preempt_dynamic_none:
-		if (!klp_override)
-			preempt_dynamic_enable(cond_resched);
+		preempt_dynamic_enable(cond_resched);
 		preempt_dynamic_disable(might_resched);
 		preempt_dynamic_disable(preempt_schedule);
 		preempt_dynamic_disable(preempt_schedule_notrace);
 		preempt_dynamic_disable(irqentry_exit_cond_resched);
-		if (mode != preempt_dynamic_mode)
-			pr_info("Dynamic Preempt: none\n");
+		pr_info("Dynamic Preempt: none\n");
 		break;
 
 	case preempt_dynamic_voluntary:
-		if (!klp_override)
-			preempt_dynamic_enable(cond_resched);
+		preempt_dynamic_enable(cond_resched);
 		preempt_dynamic_enable(might_resched);
 		preempt_dynamic_disable(preempt_schedule);
 		preempt_dynamic_disable(preempt_schedule_notrace);
 		preempt_dynamic_disable(irqentry_exit_cond_resched);
-		if (mode != preempt_dynamic_mode)
-			pr_info("Dynamic Preempt: voluntary\n");
+		pr_info("Dynamic Preempt: voluntary\n");
 		break;
 
 	case preempt_dynamic_full:
-		if (!klp_override)
-			preempt_dynamic_disable(cond_resched);
+		preempt_dynamic_disable(cond_resched);
 		preempt_dynamic_disable(might_resched);
 		preempt_dynamic_enable(preempt_schedule);
 		preempt_dynamic_enable(preempt_schedule_notrace);
 		preempt_dynamic_enable(irqentry_exit_cond_resched);
-		if (mode != preempt_dynamic_mode)
-			pr_info("Dynamic Preempt: full\n");
+		pr_info("Dynamic Preempt: full\n");
 		break;
 	}
 
 	preempt_dynamic_mode = mode;
 }
 
-void sched_dynamic_update(int mode)
-{
-	mutex_lock(&sched_dynamic_mutex);
-	__sched_dynamic_update(mode);
-	mutex_unlock(&sched_dynamic_mutex);
-}
-
-#ifdef CONFIG_HAVE_PREEMPT_DYNAMIC_CALL
-
-static int klp_cond_resched(void)
-{
-	__klp_sched_try_switch();
-	return __cond_resched();
-}
-
-void sched_dynamic_klp_enable(void)
-{
-	mutex_lock(&sched_dynamic_mutex);
-
-	klp_override = true;
-	static_call_update(cond_resched, klp_cond_resched);
-
-	mutex_unlock(&sched_dynamic_mutex);
-}
-
-void sched_dynamic_klp_disable(void)
-{
-	mutex_lock(&sched_dynamic_mutex);
-
-	klp_override = false;
-	__sched_dynamic_update(preempt_dynamic_mode);
-
-	mutex_unlock(&sched_dynamic_mutex);
-}
-
-#endif /* CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
-
 static int __init setup_preempt_mode(char *str)
 {
 	int mode = sched_dynamic_mode(str);