[printk,v1,11/18] printk: nobkl: Introduce printer threads

Message ID 20230302195618.156940-12-john.ogness@linutronix.de
State New
Headers
Series threaded/atomic console support |

Commit Message

John Ogness March 2, 2023, 7:56 p.m. UTC
  From: Thomas Gleixner <tglx@linutronix.de>

Add the infrastructure to create a printer thread per console along
with the required thread function, which is takeover/handover aware.

Co-developed-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Thomas Gleixner (Intel) <tglx@linutronix.de>
---
 include/linux/console.h      |  11 ++
 kernel/printk/internal.h     |  54 ++++++++
 kernel/printk/printk.c       |  52 ++-----
 kernel/printk/printk_nobkl.c | 259 ++++++++++++++++++++++++++++++++++-
 4 files changed, 336 insertions(+), 40 deletions(-)
  

Comments

kernel test robot March 3, 2023, 1:23 a.m. UTC | #1
Hi John,

I love your patch! Yet something to improve:

[auto build test ERROR on 10d639febe5629687dac17c4a7500a96537ce11a]

url:    https://github.com/intel-lab-lkp/linux/commits/John-Ogness/kdb-do-not-assume-write-callback-available/20230303-040039
base:   10d639febe5629687dac17c4a7500a96537ce11a
patch link:    https://lore.kernel.org/r/20230302195618.156940-12-john.ogness%40linutronix.de
patch subject: [PATCH printk v1 11/18] printk: nobkl: Introduce printer threads
config: nios2-buildonly-randconfig-r004-20230302 (https://download.01.org/0day-ci/archive/20230303/202303030957.Hkt9zcFz-lkp@intel.com/config)
compiler: nios2-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/72ef8a364036e7e813e7f7dfa8d37a4466d1ca8a
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review John-Ogness/kdb-do-not-assume-write-callback-available/20230303-040039
        git checkout 72ef8a364036e7e813e7f7dfa8d37a4466d1ca8a
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=nios2 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=nios2 SHELL=/bin/bash kernel/printk/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303030957.Hkt9zcFz-lkp@intel.com/

All errors (new ones prefixed by >>):

   kernel/printk/printk.c:2802:6: warning: no previous prototype for 'printk_get_next_message' [-Wmissing-prototypes]
    2802 | bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
         |      ^~~~~~~~~~~~~~~~~~~~~~~
   kernel/printk/printk.c: In function 'console_flush_all':
>> kernel/printk/printk.c:2979:30: error: implicit declaration of function 'console_is_usable'; did you mean 'console_exit_unsafe'? [-Werror=implicit-function-declaration]
    2979 |                         if (!console_is_usable(con, flags))
         |                              ^~~~~~~~~~~~~~~~~
         |                              console_exit_unsafe
   cc1: some warnings being treated as errors


vim +2979 kernel/printk/printk.c

a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2933  
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2934  /*
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2935   * Print out all remaining records to all consoles.
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2936   *
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2937   * @do_cond_resched is set by the caller. It can be true only in schedulable
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2938   * context.
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2939   *
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2940   * @next_seq is set to the sequence number after the last available record.
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2941   * The value is valid only when this function returns true. It means that all
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2942   * usable consoles are completely flushed.
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2943   *
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2944   * @handover will be set to true if a printk waiter has taken over the
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2945   * console_lock, in which case the caller is no longer holding the
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2946   * console_lock. Otherwise it is set to false.
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2947   *
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2948   * Returns true when there was at least one usable console and all messages
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2949   * were flushed to all usable consoles. A returned false informs the caller
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2950   * that everything was not flushed (either there were no usable consoles or
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2951   * another context has taken over printing or it is a panic situation and this
5831788afb17b89 kernel/printk/printk.c Petr Mladek             2022-06-23  2952   * is not the panic CPU). Regardless the reason, the caller should assume it
5831788afb17b89 kernel/printk/printk.c Petr Mladek             2022-06-23  2953   * is not useful to immediately try again.
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2954   *
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2955   * Requires the console_lock.
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2956   */
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2957  static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover)
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2958  {
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2959  	bool any_usable = false;
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2960  	struct console *con;
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2961  	bool any_progress;
fc956ae0de7fa25 kernel/printk/printk.c John Ogness             2022-11-16  2962  	int cookie;
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2963  
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2964  	*next_seq = 0;
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2965  	*handover = false;
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2966  
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2967  	do {
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2968  		any_progress = false;
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2969  
fc956ae0de7fa25 kernel/printk/printk.c John Ogness             2022-11-16  2970  		cookie = console_srcu_read_lock();
fc956ae0de7fa25 kernel/printk/printk.c John Ogness             2022-11-16  2971  		for_each_console_srcu(con) {
cfa886eee9834d5 kernel/printk/printk.c Thomas Gleixner         2023-03-02  2972  			short flags = console_srcu_read_flags(con);
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2973  			bool progress;
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2974  
cfa886eee9834d5 kernel/printk/printk.c Thomas Gleixner         2023-03-02  2975  			/* console_flush_all() is only for legacy consoles. */
cfa886eee9834d5 kernel/printk/printk.c Thomas Gleixner         2023-03-02  2976  			if (flags & CON_NO_BKL)
cfa886eee9834d5 kernel/printk/printk.c Thomas Gleixner         2023-03-02  2977  				continue;
cfa886eee9834d5 kernel/printk/printk.c Thomas Gleixner         2023-03-02  2978  
cfa886eee9834d5 kernel/printk/printk.c Thomas Gleixner         2023-03-02 @2979  			if (!console_is_usable(con, flags))
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2980  				continue;
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2981  			any_usable = true;
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2982  
daaab5b5bba36a5 kernel/printk/printk.c John Ogness             2023-01-09  2983  			progress = console_emit_next_record(con, handover, cookie);
fc956ae0de7fa25 kernel/printk/printk.c John Ogness             2022-11-16  2984  
fc956ae0de7fa25 kernel/printk/printk.c John Ogness             2022-11-16  2985  			/*
fc956ae0de7fa25 kernel/printk/printk.c John Ogness             2022-11-16  2986  			 * If a handover has occurred, the SRCU read lock
fc956ae0de7fa25 kernel/printk/printk.c John Ogness             2022-11-16  2987  			 * is already released.
fc956ae0de7fa25 kernel/printk/printk.c John Ogness             2022-11-16  2988  			 */
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2989  			if (*handover)
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2990  				return false;
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2991  
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2992  			/* Track the next of the highest seq flushed. */
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2993  			if (con->seq > *next_seq)
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2994  				*next_seq = con->seq;
8d91f8b15361dfb kernel/printk/printk.c Tejun Heo               2016-01-15  2995  
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2996  			if (!progress)
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2997  				continue;
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2998  			any_progress = true;
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  2999  
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  3000  			/* Allow panic_cpu to take over the consoles safely. */
8ebc476fd51e6c0 kernel/printk/printk.c Stephen Brennan         2022-02-02  3001  			if (abandon_console_lock_in_panic())
fc956ae0de7fa25 kernel/printk/printk.c John Ogness             2022-11-16  3002  				goto abandon;
8ebc476fd51e6c0 kernel/printk/printk.c Stephen Brennan         2022-02-02  3003  
8d91f8b15361dfb kernel/printk/printk.c Tejun Heo               2016-01-15  3004  			if (do_cond_resched)
8d91f8b15361dfb kernel/printk/printk.c Tejun Heo               2016-01-15  3005  				cond_resched();
^1da177e4c3f415 kernel/printk.c        Linus Torvalds          2005-04-16  3006  		}
fc956ae0de7fa25 kernel/printk/printk.c John Ogness             2022-11-16  3007  		console_srcu_read_unlock(cookie);
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  3008  	} while (any_progress);
dbdda842fe96f89 kernel/printk/printk.c Steven Rostedt (VMware  2018-01-10  3009) 
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  3010  	return any_usable;
fc956ae0de7fa25 kernel/printk/printk.c John Ogness             2022-11-16  3011  
fc956ae0de7fa25 kernel/printk/printk.c John Ogness             2022-11-16  3012  abandon:
fc956ae0de7fa25 kernel/printk/printk.c John Ogness             2022-11-16  3013  	console_srcu_read_unlock(cookie);
fc956ae0de7fa25 kernel/printk/printk.c John Ogness             2022-11-16  3014  	return false;
a699449bb13b70b kernel/printk/printk.c John Ogness             2022-04-21  3015  }
fe3d8ad31cf51b0 kernel/printk.c        Feng Tang               2011-03-22  3016
  
John Ogness March 3, 2023, 10:56 a.m. UTC | #2
On 2023-03-03, kernel test robot <lkp@intel.com> wrote:
>    kernel/printk/printk.c: In function 'console_flush_all':
>>> kernel/printk/printk.c:2979:30: error: implicit declaration of function 'console_is_usable'; did you mean 'console_exit_unsafe'? [-Werror=implicit-function-declaration]
>     2979 |                         if (!console_is_usable(con, flags))
>          |                              ^~~~~~~~~~~~~~~~~
>          |                              console_exit_unsafe

This macro needs to be defined for !CONFIG_PRINTK as well.

diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 60d6bf18247e..e4fb600daf06 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -147,6 +147,7 @@ static inline void cons_kthread_create(struct console *con) { }
 static inline bool printk_percpu_data_ready(void) { return false; }
 static inline bool cons_nobkl_init(struct console *con) { return true; }
 static inline void cons_nobkl_cleanup(struct console *con) { }
+static inline bool console_is_usable(struct console *con, short flags) { return false; }
 
 #endif /* CONFIG_PRINTK */
  
Petr Mladek April 5, 2023, 10:48 a.m. UTC | #3
On Thu 2023-03-02 21:02:11, John Ogness wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
> 
> Add the infrastructure to create a printer thread per console along
> with the required thread function, which is takeover/handover aware.
> 
> --- a/kernel/printk/internal.h
> +++ b/kernel/printk/internal.h
> @@ -75,6 +77,55 @@ u64 cons_read_seq(struct console *con);
>  void cons_nobkl_cleanup(struct console *con);
>  bool cons_nobkl_init(struct console *con);
>  bool cons_alloc_percpu_data(struct console *con);
> +void cons_kthread_create(struct console *con);
> +
> +/*
> + * Check if the given console is currently capable and allowed to print
> + * records. If the caller only works with certain types of consoles, the
> + * caller is responsible for checking the console type before calling
> + * this function.
> + */
> +static inline bool console_is_usable(struct console *con, short flags)
> +{
> +	if (!(flags & CON_ENABLED))
> +		return false;
> +
> +	if ((flags & CON_SUSPENDED))
> +		return false;
> +
> +	/*
> +	 * The usability of a console varies depending on whether
> +	 * it is a NOBKL console or not.
> +	 */
> +
> +	if (flags & CON_NO_BKL) {
> +		if (have_boot_console)
> +			return false;

I am not sure if this is the right place to discuss it.
Different patches add pieces that are part of the puzzle.

Anyway, how are the NOBKL consoles supposed to work when a boot console
is still registered, please?

I see that a later patch adds:

asmlinkage int vprintk_emit(int facility, int level,
			    const struct dev_printk_info *dev_info,
			    const char *fmt, va_list args)
{
[...]
	/*
	 * Flush the non-BKL consoles. This only leads to direct atomic
	 * printing for non-BKL consoles that do not have a printer
	 * thread available. Otherwise the printer thread will perform
	 * the printing.
	 */
	cons_atomic_flush(&wctxt, true);
[...]
}

This patch adds cons_kthread_create(). And it refuses to create
the kthread as long as there is a boot console registered.

Finally, a later added cons_atomic_flush() ignores consoles where
console_is_usable() returns false:

void cons_atomic_flush(struct cons_write_context *printk_caller_wctxt, bool skip_unsafe)
{
[...]
	for_each_console_srcu(con) {
[...]
		if (!console_is_usable(con, flags))
			continue;

It looks to me that NOBKL consoles will not show anything as long as
a boot console is registered.

And the boot console might never be removed when "keep_bootcon"
parameter is used.


Sigh, this looks like a non-trivial problem. I see it as a combination
of two things:

   + NOBKL consoles are independent. And this is actually one
     big feature.

   + There is no 1:1 relation between boot and real console using
     the same output device (serial port). I mean that
     register_console() is not able to match which real console
     is replacing a particular boot console.

As a result, we could not easily synchronize boot and the related real
console against each other.

I see three possible solutions:

A) Ignore this problem. People are most likely using only one boot
   console. And the real console will get enabled "immediately"
   after this console gets removed. So that there should not be
   any gap.

   The only problem is that people use more real consoles. And
   also unrelated real consoles will not see anything.

   I guess that people might notice that they do not see anything
   on ttyX console until ttySX replaces an early serial console.
   And they might consider this as a regression.


B) Allow to match boot and real consoles using the same output device
   and properly synchronize them against each other.

   It might mean:

       + sharing the same atomic lock (atomic_state)
       + sharing the same device (port) lock
       + avoid running both at the same time by a careful
	 switch during the registration of the real console

    , where sharing the same port lock might theoretically be done
    without 1:1 matching of the related console drivers. They
    would use the same port->lock spin_lock.

    This might also fix the ugly race during console registration
    when we unregister the boot console too early or too late.
    The switch between a boot and the related real console might be
    always smooth.

    The problem is that it might be pretty complicated to achieve
    this.


C) Synchronize also NOBKL consoles using console_sem until
   all boot consoles are removed and kthreads started.

   I might actually be pretty easy. It might be enough to
   move cons_flush_all() from vprintk_emit() into
   console_flush_all() that is called under console_lock().

   I think that we need to keep cons_flush_all() in vprintk_emit()
   to emit the messages directly in EMERGENCY and PANIC modes.
   But we do not need or must not call it there when there is
   still a boot console. We would know that it will called later
   from console_unlock() in this case.


My preferences:

   + A probably is not acceptable. It would make me feel very
     uncomfortable, definitely.

   + B looks like the best solution but it might be very hard to achieve.

   + C seems to be good enough for now.

I think that C is the only realistic way to go unless there is another
reasonable solution.

Best Regards,
Petr
  
Petr Mladek April 6, 2023, 8:09 a.m. UTC | #4
On Thu 2023-03-02 21:02:11, John Ogness wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
> 
> Add the infrastructure to create a printer thread per console along
> with the required thread function, which is takeover/handover aware.
>
> --- a/kernel/printk/printk_nobkl.c
> +++ b/kernel/printk/printk_nobkl.c
> +/**
> + * cons_kthread_func - The printk thread function
> + * @__console:	Console to operate on
> + */
> +static int cons_kthread_func(void *__console)
> +{
> +	struct console *con = __console;
> +	struct cons_write_context wctxt = {
> +		.ctxt.console	= con,
> +		.ctxt.prio	= CONS_PRIO_NORMAL,
> +		.ctxt.thread	= 1,
> +	};
> +	struct cons_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
> +	unsigned long flags;
> +	short con_flags;
> +	bool backlog;
> +	int cookie;
> +	int ret;
> +
> +	for (;;) {
> +		atomic_inc(&con->kthread_waiting);

Sigh, I really have hard times to scratch my head around the barriers
here. This part looks fine. The rcuwait_wait_event() provides full
barrier before checking the "condition".

But I am not sure about the counter part. It is in another patch.
IMHO, there should be a full barrier before checking
con->kthread_waiting. Something like this:

+  void cons_wake_threads(void)
+ {
+ 	struct console *con;
+ 	int cookie;
+

	/*
	 * Full barrier against rcuwait_wait_event() in	cons_kthread_func().
	 *
	 * The purpose of this barrier is to make sure that the new record is
	 * stored before checking con->kthread_waiting.
	 *
	 * It has the same purpose as the full barrier in rcuwait_wake_up().
	 * It makes sure that cons_kthread_should_wakeup() see the new record
	 * before going into sleep in rcuwait_wait_event().
	 *
	 * The extra barrier is needed here because rcuwait_wake_up() is called
	 * only when we see con->kthread_waiting set. We need to make sure
	 * that either we see con->kthread_waiting or cons_kthread_func()
	 * will see the new record when checking the condition in
	 * rcuwait_wait_event().
	 */
	smp_mb();

+ 	cookie = console_srcu_read_lock();
+ 	for_each_console_srcu(con) {
+ 		if (con->kthread && atomic_read(&con->kthread_waiting))
+ 			irq_work_queue(&con->irq_work);
+ 	}
+ 	console_srcu_read_unlock(cookie);
+ }

I think that I am right. But I am not in a good "see-barriers" mood so
I also might be wrong.

> +
> +		/*
> +		 * Provides a full memory barrier vs. cons_kthread_wake().
> +		 */
> +		ret = rcuwait_wait_event(&con->rcuwait,
> +					 cons_kthread_should_wakeup(con, ctxt),
> +					 TASK_INTERRUPTIBLE);

I am sorry but I would need some explanation for this. I am not
familiar with the rcuwait API. I looked at the code, commit messages,
and various users, and I am still not sure.

My assumption is that this allows to wait for an event on "con"
when the lifetime of this structure is synchronized using SRCU.
The counter-part calls rcuwait_wake_up() under srcu_read_lock().

I am afraid that it it is more complicated in our case.
We do not call rcuwait_wake_up() under srcu_read_lock().
We call it from an irq_work() that might be proceed later
after srcu_read_unlock().

IMHO, we need to make sure that there is no pending irq_work
and nobody could create a new one after exit from
unregister_console(). There seems to be irq_work_sync()
for this purpose.

> +
> +		atomic_dec(&con->kthread_waiting);
> +
> +		if (kthread_should_stop())
> +			break;
> +
> +		/* Wait was interrupted by a spurious signal, go back to sleep */
> +		if (ret)
> +			continue;
> +
> +		for (;;) {
[...]
> +
> +			if (console_is_usable(con, con_flags)) {
> +				/*
> +				 * If the emit fails, this context is no
> +				 * longer the owner. Abort the processing and
> +				 * wait for new records to print.
> +				 */
> +				if (!cons_emit_record(&wctxt))
> +					break;
> +
> +				backlog = ctxt->backlog;
> +			} else {
> +				backlog = false;
> +			}
[...]
> +	}
> +	return 0;
> +}
> +

Best Regards,
Petr
  
Petr Mladek April 6, 2023, 9:46 a.m. UTC | #5
On Thu 2023-03-02 21:02:11, John Ogness wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
> 
> Add the infrastructure to create a printer thread per console along
> with the required thread function, which is takeover/handover aware.

> --- a/kernel/printk/printk_nobkl.c
> +++ b/kernel/printk/printk_nobkl.c
> +/**
> + * cons_kthread_func - The printk thread function
> + * @__console:	Console to operate on
> + */
> +static int cons_kthread_func(void *__console)
> +{
> +	struct console *con = __console;
> +	struct cons_write_context wctxt = {
> +		.ctxt.console	= con,
> +		.ctxt.prio	= CONS_PRIO_NORMAL,
> +		.ctxt.thread	= 1,
> +	};
> +	struct cons_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
> +	unsigned long flags;
> +	short con_flags;
> +	bool backlog;
> +	int cookie;
> +	int ret;
> +
> +	for (;;) {
> +		atomic_inc(&con->kthread_waiting);
> +
> +		/*
> +		 * Provides a full memory barrier vs. cons_kthread_wake().
> +		 */
> +		ret = rcuwait_wait_event(&con->rcuwait,
> +					 cons_kthread_should_wakeup(con, ctxt),
> +					 TASK_INTERRUPTIBLE);
> +
> +		atomic_dec(&con->kthread_waiting);
> +
> +		if (kthread_should_stop())
> +			break;
> +
> +		/* Wait was interrupted by a spurious signal, go back to sleep */
> +		if (ret)
> +			continue;
> +
> +		for (;;) {
> +			cookie = console_srcu_read_lock();
> +
> +			/*
> +			 * Ensure this stays on the CPU to make handover and
> +			 * takeover possible.
> +			 */
> +			if (con->port_lock)
> +				con->port_lock(con, true, &flags);

IMHO, we should use a more generic name. This should be a lock that
provides full synchronization between con->write() and other
operations on the device used by the console.

"port_lock" is specific for the serial consoles. IMHO, other consoles
might use another lock. IMHO, tty uses "console_lock" internally for
this purpose. netconsole seems to has "target_list_lock" that might
possible have this purpose, s390 consoles are using sclp_con_lock,
sclp_vt220_lock, or get_ccwdev_lock(raw->cdev).


Honestly, I expected that we could replace these locks by
cons_acquire_lock(). I know that the new lock is special: sleeping,
timeouting, allows hand-over by priorities.

But I think that we might implement cons_acquire_lock() that would always
busy wait without any timeout. And use some "priority" that would
never handover the lock a voluntary way at least not with a voluntary
one. The only difference would be that it is sleeping. But it might
be acceptable in many cases.

Using the new lock instead of port->lock would allow to remove
the tricks with using spin_trylock() when oops_in_progress is set.

That said, I am not sure if this is possible without major changes.
For example, in case of serial consoles, it would require touching
the layer using port->lock.

Also it would requere 1:1 relation between struct console and the output
device lock. I am not sure if it is always the case. On the other
hand, adding some infrastructure for this 1:1 relationship would
help to solve smooth transition from the boot to the real console
driver.


OK, let's first define what the two locks are supposed to synchronize.
My understanding is that this patchset uses them the following way:

    + The new lock (atomic_state) is used to serialize emiting
      messages between different write contexts. It replaces
      the functionality of console_lock.

      It is a per-console sleeping lock, allows voluntary and hars
      hand-over using priorities and spinning with a timeout.


    + The port_lock is used to synchronize various operations
      of the console driver/device, like probe, init, exit,
      configuration update.

      It is typically a per-console driver/device spin lock.


I guess that we would want to keep both locks:

    + it might help to do the rework manageable

    + the sleeping lock might complicate some operations;
      raw_spin_lock might be necessary at least on
      non-RT system.


Are there better names? What about?

    + emit_lock() or ctxt_lock() for the new special lock

    + device_lock() or driver_lock() as a generic name
      for the driver/device specific lock.

Sigh, the problem with the device_lock()/driver_lock()
is that it might get confused with:

	struct tty_driver	*(*device)(struct console *co, int *index);

It would be really great to make it clear that this callback is about
the connection to the tty layer. I would rename it to:

	struct tty_driver	*(*tty_drv)(struct console *co, int *index);
or
	struct tty_driver	*(*tty_driver)(struct console *co, int *index);


> +			else
> +				migrate_disable();
> +
> +			/*
> +			 * Try to acquire the console without attempting to
> +			 * take over. If an atomic printer wants to hand
> +			 * back to the thread it simply wakes it up.
> +			 */
> +			if (!cons_try_acquire(ctxt))
> +				break;
> +

Best Regards,
Petr
  
Petr Mladek April 6, 2023, 1:19 p.m. UTC | #6
On Thu 2023-03-02 21:02:11, John Ogness wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
> 
> Add the infrastructure to create a printer thread per console along
> with the required thread function, which is takeover/handover aware.

This deserves a more detailed description. It should describe
the expected behavior of the newly added pieces of the puzzle.

> --- a/kernel/printk/printk_nobkl.c
> +++ b/kernel/printk/printk_nobkl.c
> @@ -714,6 +717,14 @@ static bool __cons_try_acquire(struct cons_context *ctxt)
>  		goto success;
>  	}
>  
> +	/*
> +	 * A threaded printer context will never spin or perform a
> +	 * hostile takeover. The atomic writer will wake the thread
> +	 * when it is done with the important output.
> +	 */
> +	if (ctxt->thread)
> +		return false;

I suggest to remove this optimization. Or replace it with a check
of the lowest NORMAL priority.

First, it is conceptually questionable. The kthread might actually want to
take over the lock. It is the preferred context when the system
works properly.

Second, it is a bit superfluous. The kthread will give up on the next check
anyway because it is the context with the lowest NORMAL priority.

I guess that this is another relic of the first POC that allowed
to take over the lock from a context of the same priority.

I though more about passing the lock:

Passing the lock between console contexts of the same priority would
have basically the same effect as console_trylock_spinning() trick
in the legacy code. The only motivation would be to reduce
the risk of softlockups. But it would make sense only in the EMERGENCY
contexts. There should be only one NORMAL and PANIC contexts.

Also passing the lock between context of the same priority would
be more complicated with the NOBKL consoles. Some messages (parts)
might be printed many times when the lock is passed in the middle
of the record and the new owner always starts from scratch.

> +
>  	/*
>  	 * If the active context is on the same CPU then there is
>  	 * obviously no handshake possible.
> @@ -871,6 +882,9 @@ static bool __cons_release(struct cons_context *ctxt)
>  	return true;
>  }
>

> +static bool printk_threads_enabled __ro_after_init;

This might deserve a comment when exactly it gets enabled.
My understanding is that it is set during a boot phase
when it is safe to create the kthreads.

> +static bool printk_force_atomic __initdata;

I guess that this will be a kernel parameter. But it is not defined in
this patch. The logic should be introduced together with the parameter.

> +
>  /**
>   * cons_release - Release the console after output is done
>   * @ctxt:	The acquire context that contains the state
> @@ -1203,6 +1219,243 @@ static int __maybe_unused cons_emit_record(struct cons_write_context *wctxt)
>  	return cons_seq_try_update(ctxt);
>  }
>  
> +/**
> + * cons_kthread_should_wakeup - Check whether the printk thread should wakeup
> + * @con:	Console to operate on
> + * @ctxt:	The acquire context that contains the state
> + *		at console_acquire()
> + *
> + * Returns: True if the thread should shutdown or if the console is allowed to
> + * print and a record is available. False otherwise
> + *
> + * After the thread wakes up, it must first check if it should shutdown before
> + * attempting any printing.

I would move this comment right above the kthread_should_stop()
check. I think that it is a bigger chance to see it there.

> + */
> +static bool cons_kthread_should_wakeup(struct console *con, struct cons_context *ctxt)
> +{
> +	bool is_usable;
> +	short flags;
> +	int cookie;
> +
> +	if (kthread_should_stop())
> +		return true;
> +
> +	cookie = console_srcu_read_lock();
> +	flags = console_srcu_read_flags(con);
> +	is_usable = console_is_usable(con, flags);
> +	console_srcu_read_unlock(cookie);
> +
> +	if (!is_usable)
> +		return false;
> +
> +	/* This reads state and sequence on 64bit. On 32bit only state */
> +	cons_state_read(con, CON_STATE_CUR, &ctxt->state);
> +
> +	/*
> +	 * Atomic printing is running on some other CPU. The owner
> +	 * will wake the console thread on unlock if necessary.
> +	 */
> +	if (ctxt->state.locked)
> +		return false;
> +
> +	/* Bring the sequence in @ctxt up to date */
> +	cons_context_set_seq(ctxt);

This name is a bit confusing. It looks like it is setting some state.
But the primary function is to actually read the value.

Also the function sets both "oldseq" and "newseq". This looks superfluous.
The caller will need to refresh the values once again after
cons_try_acquire_lock() once again.

It should be enough to set "oldseq" here and "newseq" in cons_emit_record().

Finally, in an other mail I suggested to move:

    + ctxt.newseq -> xtxt.pmsg.seq
    + ctxt.oldseq -> ctxt.con_seq		// cache of con->seq

What about renaming the function to something like:

    + cons_context_read_con_seq()
    + cons_context_refresh_con_seq()


> +
> +	return prb_read_valid(prb, ctxt->oldseq, NULL);
> +}
> +
> +/**
> + * cons_kthread_func - The printk thread function
> + * @__console:	Console to operate on
> + */
> +static int cons_kthread_func(void *__console)
> +{
> +	struct console *con = __console;
> +	struct cons_write_context wctxt = {
> +		.ctxt.console	= con,
> +		.ctxt.prio	= CONS_PRIO_NORMAL,
> +		.ctxt.thread	= 1,
> +	};
> +	struct cons_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
> +	unsigned long flags;
> +	short con_flags;
> +	bool backlog;
> +	int cookie;
> +	int ret;
> +
> +	for (;;) {
> +		atomic_inc(&con->kthread_waiting);
> +
> +		/*
> +		 * Provides a full memory barrier vs. cons_kthread_wake().
> +		 */
> +		ret = rcuwait_wait_event(&con->rcuwait,
> +					 cons_kthread_should_wakeup(con, ctxt),
> +					 TASK_INTERRUPTIBLE);
> +
> +		atomic_dec(&con->kthread_waiting);
> +
> +		if (kthread_should_stop())
> +			break;
> +
> +		/* Wait was interrupted by a spurious signal, go back to sleep */
> +		if (ret)
> +			continue;
> +
> +		for (;;) {
> +			cookie = console_srcu_read_lock();
> +
> +			/*
> +			 * Ensure this stays on the CPU to make handover and
> +			 * takeover possible.
> +			 */
> +			if (con->port_lock)
> +				con->port_lock(con, true, &flags);
> +			else
> +				migrate_disable();
> +
> +			/*
> +			 * Try to acquire the console without attempting to
> +			 * take over. If an atomic printer wants to hand
> +			 * back to the thread it simply wakes it up.
> +			 */
> +			if (!cons_try_acquire(ctxt))
> +				break;
> +
> +			con_flags = console_srcu_read_flags(con);
> +
> +			if (console_is_usable(con, con_flags)) {
> +				/*
> +				 * If the emit fails, this context is no
> +				 * longer the owner. Abort the processing and
> +				 * wait for new records to print.
> +				 */
> +				if (!cons_emit_record(&wctxt))

Please, rename the function to cons_emit_next_record() to match
the corresponding console_emit_next_record().

> +					break;
> +				backlog = ctxt->backlog;

Also please pass the 3rd possible return state via "handover" variable
to match the semantic of console_emit_next_record().

> +			} else {
> +				backlog = false;
> +			}
> +
> +			/*
> +			 * If the release fails, this context was not the
> +			 * owner. Abort the processing and wait for new
> +			 * records to print.
> +			 */
> +			if (!cons_release(ctxt))
> +				break;
> +
> +			/* Backlog done? */
> +			if (!backlog)
> +				break;
> +
> +			if (con->port_lock)
> +				con->port_lock(con, false, &flags);
> +			else
> +				migrate_enable();
> +
> +			console_srcu_read_unlock(cookie);
> +
> +			cond_resched();
> +		}
> +		if (con->port_lock)
> +			con->port_lock(con, false, &flags);
> +		else
> +			migrate_enable();
> +
> +		console_srcu_read_unlock(cookie);
> +	}
> +	return 0;
> +}
> +
> +/**
> + * cons_kthread_stop - Stop a printk thread
> + * @con:	Console to operate on
> + */
> +static void cons_kthread_stop(struct console *con)
> +{
> +	lockdep_assert_console_list_lock_held();
> +
> +	if (!con->kthread)
> +		return;

We need some tricks here to make sure that cons_kthread_wakeup()
will not longer wake it up:

	con->block_wakeup = true;
	irq_work_sync(&con->irq_work);

> +	kthread_stop(con->kthread);
> +	con->kthread = NULL;
> +
> +	kfree(con->thread_pbufs);
> +	con->thread_pbufs = NULL;
> +}
> +
> +/**
> + * cons_kthread_create - Create a printk thread
> + * @con:	Console to operate on
> + *
> + * If it fails, let the console proceed. The atomic part might
> + * be usable and useful.
> + */
> +void cons_kthread_create(struct console *con)
> +{
> +	struct task_struct *kt;
> +	struct console *c;
> +
> +	lockdep_assert_console_list_lock_held();
> +
> +	if (!(con->flags & CON_NO_BKL) || !con->write_thread)
> +		return;
> +
> +	if (!printk_threads_enabled || con->kthread)
> +		return;
> +
> +	/*
> +	 * Printer threads cannot be started as long as any boot console is
> +	 * registered because there is no way to synchronize the hardware
> +	 * registers between boot console code and regular console code.
> +	 */
> +	for_each_console(c) {
> +		if (c->flags & CON_BOOT)
> +			return;
> +	}
> +	have_boot_console = false;
> +
> +	con->thread_pbufs = kmalloc(sizeof(*con->thread_pbufs), GFP_KERNEL);
> +	if (!con->thread_pbufs) {
> +		con_printk(KERN_ERR, con, "failed to allocate printing thread buffers\n");
> +		return;
> +	}
> +
> +	kt = kthread_run(cons_kthread_func, con, "pr/%s%d", con->name, con->index);
> +	if (IS_ERR(kt)) {
> +		con_printk(KERN_ERR, con, "failed to start printing thread\n");
> +		kfree(con->thread_pbufs);
> +		con->thread_pbufs = NULL;
> +		return;

We should make sure that this console will still get flushed either
in vprintk_emit() or in console_unlock(). I think that it is not
guaranteed by this patchset.

> +	}
> +
> +	con->kthread = kt;
> +
> +	/*
> +	 * It is important that console printing threads are scheduled
> +	 * shortly after a printk call and with generous runtime budgets.
> +	 */
> +	sched_set_normal(con->kthread, -20);
> +}
> +

Best Regards,
Petr
  
Petr Mladek April 13, 2023, 1:28 p.m. UTC | #7
On Thu 2023-03-02 21:02:11, John Ogness wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
> 
> Add the infrastructure to create a printer thread per console along
> with the required thread function, which is takeover/handover aware.
> 

Nit:

Several variable and function name use "thread"

  + con.thread_pbufs
  + con.write_thread()
  + printk_threads_enabled

and many others use "kthread":

  + con.kthread
  + con.kthread_waiting
  + cons_kthread_wake()
  + cons_kthread_create()
  + cons_kthread_should_wakeup()

I do not see any pattern. It would be nice to choose just one variant
for the cons/printk API. I slightly prefer "kthread" but "thread"
would be fine as well.

When we are on the (k)thread naming stuff. We talk about it
historically as a printk (k)thread. But I often feels that it
rather should be a console (k)thread, especially when we have
one per console.

That said, both variants make sense. The thread is used for showing
messages from the printk ring buffer.

Best Regards,
Petr
  
Petr Mladek April 20, 2023, 9:55 a.m. UTC | #8
On Thu 2023-04-06 11:46:37, Petr Mladek wrote:
> On Thu 2023-03-02 21:02:11, John Ogness wrote:
> > From: Thomas Gleixner <tglx@linutronix.de>
> > 
> > Add the infrastructure to create a printer thread per console along
> > with the required thread function, which is takeover/handover aware.
> 
> > --- a/kernel/printk/printk_nobkl.c
> > +++ b/kernel/printk/printk_nobkl.c
> > +/**
> > + * cons_kthread_func - The printk thread function
> > + * @__console:	Console to operate on
> > + */
> > +static int cons_kthread_func(void *__console)
> > +{
> > +	struct console *con = __console;
> > +	struct cons_write_context wctxt = {
> > +		.ctxt.console	= con,
> > +		.ctxt.prio	= CONS_PRIO_NORMAL,
> > +		.ctxt.thread	= 1,
> > +	};
> > +	struct cons_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
> > +	unsigned long flags;
> > +	short con_flags;
> > +	bool backlog;
> > +	int cookie;
> > +	int ret;
> > +
> > +	for (;;) {
> > +		atomic_inc(&con->kthread_waiting);
> > +
> > +		/*
> > +		 * Provides a full memory barrier vs. cons_kthread_wake().
> > +		 */
> > +		ret = rcuwait_wait_event(&con->rcuwait,
> > +					 cons_kthread_should_wakeup(con, ctxt),
> > +					 TASK_INTERRUPTIBLE);
> > +
> > +		atomic_dec(&con->kthread_waiting);
> > +
> > +		if (kthread_should_stop())
> > +			break;
> > +
> > +		/* Wait was interrupted by a spurious signal, go back to sleep */
> > +		if (ret)
> > +			continue;
> > +
> > +		for (;;) {
> > +			cookie = console_srcu_read_lock();
> > +
> > +			/*
> > +			 * Ensure this stays on the CPU to make handover and
> > +			 * takeover possible.
> > +			 */
> > +			if (con->port_lock)
> > +				con->port_lock(con, true, &flags);
> 
> IMHO, we should use a more generic name. This should be a lock that
> provides full synchronization between con->write() and other
> operations on the device used by the console.
> 
> "port_lock" is specific for the serial consoles. IMHO, other consoles
> might use another lock. IMHO, tty uses "console_lock" internally for
> this purpose. netconsole seems to has "target_list_lock" that might
> possible have this purpose, s390 consoles are using sclp_con_lock,
> sclp_vt220_lock, or get_ccwdev_lock(raw->cdev).
> 
> 
> Honestly, I expected that we could replace these locks by
> cons_acquire_lock(). I know that the new lock is special: sleeping,
> timeouting, allows hand-over by priorities.
> 
> But I think that we might implement cons_acquire_lock() that would always
> busy wait without any timeout. And use some "priority" that would
> never handover the lock a voluntary way at least not with a voluntary
> one. The only difference would be that it is sleeping. But it might
> be acceptable in many cases.
> 
> Using the new lock instead of port->lock would allow to remove
> the tricks with using spin_trylock() when oops_in_progress is set.
> 
> That said, I am not sure if this is possible without major changes.
> For example, in case of serial consoles, it would require touching
> the layer using port->lock.
> 
> Also it would requere 1:1 relation between struct console and the output
> device lock. I am not sure if it is always the case. On the other
> hand, adding some infrastructure for this 1:1 relationship would
> help to solve smooth transition from the boot to the real console
> driver.
> 
> 
> OK, let's first define what the two locks are supposed to synchronize.
> My understanding is that this patchset uses them the following way:
> 
>     + The new lock (atomic_state) is used to serialize emiting
>       messages between different write contexts. It replaces
>       the functionality of console_lock.
> 
>       It is a per-console sleeping lock, allows voluntary and hars
>       hand-over using priorities and spinning with a timeout.
> 
> 
>     + The port_lock is used to synchronize various operations
>       of the console driver/device, like probe, init, exit,
>       configuration update.
> 
>       It is typically a per-console driver/device spin lock.
> 
> 
> I guess that we would want to keep both locks:
> 
>     + it might help to do the rework manageable
> 
>     + the sleeping lock might complicate some operations;
>       raw_spin_lock might be necessary at least on
>       non-RT system.

I forgot to check how these two locks are supposed to be used
in write_atomic().

It seems that cons_atomic_flush_con() takes only the new lock
(atomic_state) and ignores the port_lock(). It should be safe
against write_kthread(). But it is not safe against other
operations with the console device that are synchronized
only by the port_lock().

This looks like a potential source of problems and regressions.

Do I miss something, please?
Is there any plan how to deal with this?

Best Regards,
Petr
  
John Ogness April 20, 2023, 10:33 a.m. UTC | #9
On 2023-04-20, Petr Mladek <pmladek@suse.com> wrote:
>> OK, let's first define what the two locks are supposed to synchronize.
>> My understanding is that this patchset uses them the following way:
>> 
>>     + The new lock (atomic_state) is used to serialize emiting
>>       messages between different write contexts. It replaces
>>       the functionality of console_lock.

It replaces the functionality of console_lock, but operates at a finer
level. It is serializing all access to the hardware registers involved
in outputting. For the 8250 driver, this is the IER register.

>>       It is a per-console sleeping lock, allows voluntary and has
>>       hand-over using priorities and spinning with a timeout.

It is not a sleeping lock. It is used as a trylock or spinning with
timeout. It has the special feature that it can be handed over to or
stolen by another context with a higher ownership priority.

>>     + The port_lock is used to synchronize various operations
>>       of the console driver/device, like probe, init, exit,
>>       configuration update.
>> 
>>       It is typically a per-console driver/device spin lock.
>> 
>> 
>> I guess that we would want to keep both locks:

I agree because the port_lock has a much larger scope and is fully
preemptible under PREEMPT_RT.

> I forgot to check how these two locks are supposed to be used
> in write_atomic().
>
> It seems that cons_atomic_flush_con() takes only the new lock
> (atomic_state) and ignores the port_lock(). It should be safe
> against write_kthread(). But it is not safe against other
> operations with the console device that are synchronized
> only by the port_lock().

Yes, it is because the console drivers will also take the atomic_state
lock when needed. You can see this in the POC patch I posted [0].

For example, a new function serial8250_enter_unsafe() is used by the
serial drivers to mark the beginning of an unsafe section. To use this
function, the port_lock must be held. This function additionally takes
the atomic_state lock. Then the driver is allowed to touch hardware
registers related to outputting (IER).

But typically the driver will use a new higher level function, for
example serial8250_in_IER(), which will enter unsafe, read the register,
and exit unsafe. This provides the necessary synchronization against
write_atomic() (for the 8250 driver).

Please also remember that hostile takeovers of drivers in unsafe
sections are done as a last resort in panic, after all other nbcon
consoles have safely flushed their buffers. So we should not spend too
many brain cycles on "what if the atomic_state lock is stolen while in
an unsafe section" questions. The answer is: then you are in "hope and
pray" mode.

John

[0] https://lore.kernel.org/lkml/877cv1geo4.fsf@jogness.linutronix.de
  
Petr Mladek April 20, 2023, 1:33 p.m. UTC | #10
On Thu 2023-04-20 12:39:31, John Ogness wrote:
> On 2023-04-20, Petr Mladek <pmladek@suse.com> wrote:
> >> OK, let's first define what the two locks are supposed to synchronize.
> >> My understanding is that this patchset uses them the following way:
> >> 
> >>     + The new lock (atomic_state) is used to serialize emiting
> >>       messages between different write contexts. It replaces
> >>       the functionality of console_lock.
> 
> It replaces the functionality of console_lock, but operates at a finer
> level. It is serializing all access to the hardware registers involved
> in outputting. For the 8250 driver, this is the IER register.
> 
> >>       It is a per-console sleeping lock, allows voluntary and has
> >>       hand-over using priorities and spinning with a timeout.
> 
> It is not a sleeping lock. It is used as a trylock or spinning with
> timeout. It has the special feature that it can be handed over to or
> stolen by another context with a higher ownership priority.

What prevents the owner from sleeping, please?
Is the disabled preemption enforced?

I see migrate_disable() in con_kthread_func() when con->port_lock
does not exist. It means that the preemption is possible in
this case.

> >>     + The port_lock is used to synchronize various operations
> >>       of the console driver/device, like probe, init, exit,
> >>       configuration update.
> >> 
> >>       It is typically a per-console driver/device spin lock.
> >> 
> >> 
> >> I guess that we would want to keep both locks:
> 
> I agree because the port_lock has a much larger scope and is fully
> preemptive under PREEMPT_RT.

Do you really want to call the entire cons_emit_record()
in non-preemptive context on RT?

It should be enough to disable preemption in the unsafe sections.
IMHO, it might be enabled in the "safe" sections. The only
drawback would be that the emergency context might take over
the lock in the middle of the line. But emergency context should be
rare. So it should be rare.


> > I forgot to check how these two locks are supposed to be used
> > in write_atomic().
> >
> > It seems that cons_atomic_flush_con() takes only the new lock
> > (atomic_state) and ignores the port_lock(). It should be safe
> > against write_kthread(). But it is not safe against other
> > operations with the console device that are synchronized
> > only by the port_lock().
> 
> Yes, it is because the console drivers will also take the atomic_state
> lock when needed. You can see this in the POC patch I posted [0].
> 
> For example, a new function serial8250_enter_unsafe() is used by the
> serial drivers to mark the beginning of an unsafe section. To use this
> function, the port_lock must be held. This function additionally takes
> the atomic_state lock. Then the driver is allowed to touch hardware
> registers related to outputting (IER).

I see.

I have missed it because the driver is still taking port->lock
directly on many locations. It seems to be in the init code.
It makes sense because it is called before the port gets
registered as a console.

Sigh, sigh, sigh, this scares me a lot. How do we know that all
code paths are correctly serialized or that they could never
be called in parallel?

Especially, the generic serial8250 API seems to be used in many drivers
together with a lot of custom code.

For example, what about serial8250_handle_irq()? How is it serialized
against serial8250_console_write_atomic()?

> But typically the driver will use a new higher level function, for
> example serial8250_in_IER(), which will enter unsafe, read the register,
> and exit unsafe. This provides the necessary synchronization against
> write_atomic() (for the 8250 driver).

Hmm, I think that we should create an API that would take the right
locks according to the state:

     + port->lock when the console is not registered
     + port->lock + con->atomic_state lock when the console is registered

and use it everywhere intestead of taking port->lock directly.

> Please also remember that hostile takeovers of drivers in unsafe
> sections are done as a last resort in panic, after all other nbcon
> consoles have safely flushed their buffers. So we should not spend too
> many brain cycles on "what if the atomic_state lock is stolen while in
> an unsafe section" questions. The answer is: then you are in "hope and
> pray" mode.

I know. And the hostile takeover is not my concern.

My concern are races between write_atomic() in emergency context
and other driver code serialized only by the port->lock.

We need an API that will make sure that any code serialized
by port->lock is properly serialized against write->atomic()
when the console is registered. Also we need to synchronize
the registration and port->lock.

Best Regards,
Petr
  
Petr Mladek April 21, 2023, 4:15 p.m. UTC | #11
On Thu 2023-04-20 15:33:10, Petr Mladek wrote:
> On Thu 2023-04-20 12:39:31, John Ogness wrote:
> I know. And the hostile takeover is not my concern.
> 
> My concern are races between write_atomic() in emergency context
> and other driver code serialized only by the port->lock.
> 
> We need an API that will make sure that any code serialized
> by port->lock is properly serialized against write->atomic()
> when the console is registered.

I though more about it. My idea is the following:

A. The nbcon side might have basically four modes
   for taking the new nbcon lock. It might have four interfaces:

    nbcon_trylock(struct console *con,
		  enum cons_prio prio);
    nbcon_trylock_emergency(struct console *con,
		  enum cons_prio prio);
    nbcon_trylock_panic(struct console *con,
		  enum cons_prio prio);
    nbcon_lock(struct console *con,
		  enum cons_prio prio);

, where

   + nbcon_trylock() would use the current approach for
     the printk kthread. It means that it would try to get
     the lock with a timeout. But it would never try to
     steel the lock.

   + nbcon_trylock_emergency() would use the current approach
     used in emergency context. It would busy wait and
     then try to steel the lock. But it would take over the lock
     only when it is in safe context.

    + nbcon_trylock_panic() would behave the same way as
      nbcon_trylock_emergency(). But it would allow to
      take over the lock even when it is unsafe. It might
      still fail when it is not called on the CPU that
      handles the panic().

    + nbcon_lock() would wait until the lock is really
      available.

  and

  enum cons_prio would be one of the four priorities.

  The API should disable cpu migration to make sure that
  it will stay the same until the lock is released.

  The caller should rememner the priority somewhere,
  e,g. in struct cons_ctxt.


B. The port->lock side would switch to the new nbcon lock
   when the console is registered. There are two big questions
   that come to my mind:

  1. The original code does not expect that it might lose
     the lock.

     It should be enough to mark the entire section .unsafe.
     In that case, only the final panic() call might steel
     the lock.


  2. The console registration must be done a safe way
     to make sure that all callers will use the same
     real lock (port->lock or nbcon_lock).


  IMHO, the uart_port part might look like:

void uart_port_lock_irqsafe(struct uart_port *port,
	int *cookie,
	unsigned long &flags)
{
	struct console *con;

try_again:
	/* Synchrnonization against console registration. */
	*cookie = console_srcu_read_lock();

	con = rcu_access_pointer(nbcon->cons);

	if (!can_use_nbcon_lock(con)) {
		/* Only the port lock is available. */
		spin_lock_irqsafe(&port->lock, *flags);
		port->locked = LOCKED_BY_PORT_LOCK;
		return;
	}


	/*
	 * The nbcon lock is available. Take it instead of
	 * the port->lock. The only exception is when
	 * there is registration in progress. In this case,
	 * port->lock has to be taken as well.
	 *
	 * It will always be taken only with the normal priority.
	 * when called from the port lock side.
	 */
	nbcon_lock(con, CON_PRIO_NORMAL);
	local_irq_save(*flags);

	if (cons->registration_in_progress) {
		spin_lock(&port->lock);
		port->locked = LOCKED_BY_BOTH_LOCKS;
	} else {
		port->locked = LOCKED_BY_NBCON_LOCK;
	}

	/*
	 * Makes sure that only nbcon_lock_panic() would
	 * be able to steel this lock.
	 */
	if (!nbcon_enter_unsafe(con, CON_PRIO_NORMAL)) {
		revert locks;
		goto try_again;
	}
}

void uart_port_unlock_irqrestore(struct uart_port *port,
	int *cookie, unsigned long *flags)
{
	struct console *con;

	con = rcu_access_pointer(nbcon->cons);

	switch (port->locked) {
	LOCKED_BY_PORT_LOCK:
		spin_unlock_irqrestore(&port->lock, *flags);
		break;
	LOCKED_BY_BOTH_LOCKS:
		spin_unlock(&port->lock);
		fallthrough;
	LOCKED_BY_NBCON_LOCK:
		nbcon_exit_unsafe(con, CON_PRIO_NORMAL);
		local_irq_restore(*flags);
		nbcon_unlock(con, CON_PRIO_NORMAL);
	};

	console_srcu_unlock(*cookie);
}


and finally the registration:

void register_console(struct console *newcon)
{
[...]
	if (con->flags & CON_NBCON) {
		nbcon_lock(con);
		nbcon->regisration_in_progress = true;
		nbcon_unlock(con);

		/*
		 * Make sure that callers are locked by both
		 * nbcon_lock() and port->lock()
		 */
		synchronize_srcu();
	}

	/* Insert the console into console_list */

	if (con->flags & CON_NBCON) {
		nbcon_lock(con);
		nbcon->regisration_in_progress = false;
		nbcon_unlock(con);
	}
[...]
}

and similar thing in uregister_console().

I am not sure if I should send this on Friday evening. But I reworked
it many times and I do not longer see any obvious reason why it
could not work.

My relief is that it builds on top of your code. It basically just
adds the port_lock interface. I hope that it would actually simplify
things a lot.

Well, a huge challenge might be to replace all spin_lock(port->lock)
calls with the new API. There is a lot of code shared between various
consoles and we wanted to migrate them one-by-one.

On the other hand, the new port_lock() API should behave as simple
spin lock when port->cons is a legacy console.

Best Regards,
Petr
  

Patch

diff --git a/include/linux/console.h b/include/linux/console.h
index 15f71ccfcd9d..2c120c3f3c6e 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -17,6 +17,7 @@ 
 #include <linux/atomic.h>
 #include <linux/bits.h>
 #include <linux/rculist.h>
+#include <linux/rcuwait.h>
 #include <linux/types.h>
 
 struct vc_data;
@@ -314,7 +315,12 @@  struct cons_context_data;
  * @atomic_state:	State array for NOBKL consoles; real and handover
  * @atomic_seq:		Sequence for record tracking (32bit only)
  * @thread_pbufs:	Pointer to thread private buffer
+ * @kthread:		Pointer to kernel thread
+ * @rcuwait:		RCU wait for the kernel thread
+ * @kthread_waiting:	Indicator whether the kthread is waiting to be woken
  * @write_atomic:	Write callback for atomic context
+ * @write_thread:	Write callback for printk threaded printing
+ * @port_lock:		Callback to lock/unlock the port lock
  * @pcpu_data:		Pointer to percpu context data
  */
 struct console {
@@ -342,8 +348,13 @@  struct console {
 	atomic_t		__private atomic_seq;
 #endif
 	struct printk_buffers	*thread_pbufs;
+	struct task_struct	*kthread;
+	struct rcuwait		rcuwait;
+	atomic_t		kthread_waiting;
 
 	bool (*write_atomic)(struct console *con, struct cons_write_context *wctxt);
+	bool (*write_thread)(struct console *con, struct cons_write_context *wctxt);
+	void (*port_lock)(struct console *con, bool do_lock, unsigned long *flags);
 
 	struct cons_context_data	__percpu *pcpu_data;
 };
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 13dd0ce23c37..8856beed65da 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -44,6 +44,8 @@  enum printk_info_flags {
 
 extern struct printk_ringbuffer *prb;
 
+extern bool have_boot_console;
+
 __printf(4, 0)
 int vprintk_store(int facility, int level,
 		  const struct dev_printk_info *dev_info,
@@ -75,6 +77,55 @@  u64 cons_read_seq(struct console *con);
 void cons_nobkl_cleanup(struct console *con);
 bool cons_nobkl_init(struct console *con);
 bool cons_alloc_percpu_data(struct console *con);
+void cons_kthread_create(struct console *con);
+
+/*
+ * Check if the given console is currently capable and allowed to print
+ * records. If the caller only works with certain types of consoles, the
+ * caller is responsible for checking the console type before calling
+ * this function.
+ */
+static inline bool console_is_usable(struct console *con, short flags)
+{
+	if (!(flags & CON_ENABLED))
+		return false;
+
+	if ((flags & CON_SUSPENDED))
+		return false;
+
+	/*
+	 * The usability of a console varies depending on whether
+	 * it is a NOBKL console or not.
+	 */
+
+	if (flags & CON_NO_BKL) {
+		if (have_boot_console)
+			return false;
+
+	} else {
+		if (!con->write)
+			return false;
+		/*
+		 * Console drivers may assume that per-cpu resources have
+		 * been allocated. So unless they're explicitly marked as
+		 * being able to cope (CON_ANYTIME) don't call them until
+		 * this CPU is officially up.
+		 */
+		if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * cons_kthread_wake - Wake up a printk thread
+ * @con:        Console to operate on
+ */
+static inline void cons_kthread_wake(struct console *con)
+{
+	rcuwait_wake_up(&con->rcuwait);
+}
 
 #else
 
@@ -82,6 +133,9 @@  bool cons_alloc_percpu_data(struct console *con);
 #define PRINTK_MESSAGE_MAX	0
 #define PRINTKRB_RECORD_MAX	0
 
+static inline void cons_kthread_wake(struct console *con) { }
+static inline void cons_kthread_create(struct console *con) { }
+
 /*
  * In !PRINTK builds we still export console_sem
  * semaphore and some of console functions (console_unlock()/etc.), so
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index eab0358baa6f..4c6abb033ec1 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2723,45 +2723,6 @@  static bool abandon_console_lock_in_panic(void)
 	return atomic_read(&panic_cpu) != raw_smp_processor_id();
 }
 
-/*
- * Check if the given console is currently capable and allowed to print
- * records. If the caller only works with certain types of consoles, the
- * caller is responsible for checking the console type before calling
- * this function.
- */
-static inline bool console_is_usable(struct console *con, short flags)
-{
-	if (!(flags & CON_ENABLED))
-		return false;
-
-	if ((flags & CON_SUSPENDED))
-		return false;
-
-	/*
-	 * The usability of a console varies depending on whether
-	 * it is a NOBKL console or not.
-	 */
-
-	if (flags & CON_NO_BKL) {
-		if (have_boot_console)
-			return false;
-
-	} else {
-		if (!con->write)
-			return false;
-		/*
-		 * Console drivers may assume that per-cpu resources have
-		 * been allocated. So unless they're explicitly marked as
-		 * being able to cope (CON_ANYTIME) don't call them until
-		 * this CPU is officially up.
-		 */
-		if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME))
-			return false;
-	}
-
-	return true;
-}
-
 static void __console_unlock(void)
 {
 	console_locked = 0;
@@ -3573,10 +3534,14 @@  EXPORT_SYMBOL(register_console);
 /* Must be called under console_list_lock(). */
 static int unregister_console_locked(struct console *console)
 {
+	struct console *c;
+	bool is_boot_con;
 	int res;
 
 	lockdep_assert_console_list_lock_held();
 
+	is_boot_con = console->flags & CON_BOOT;
+
 	con_printk(KERN_INFO, console, "disabled\n");
 
 	res = _braille_unregister_console(console);
@@ -3620,6 +3585,15 @@  static int unregister_console_locked(struct console *console)
 	if (console->exit)
 		res = console->exit(console);
 
+	/*
+	 * Each time a boot console unregisters, try to start up the printing
+	 * threads. They will only start if this was the last boot console.
+	 */
+	if (is_boot_con) {
+		for_each_console(c)
+			cons_kthread_create(c);
+	}
+
 	return res;
 }
 
diff --git a/kernel/printk/printk_nobkl.c b/kernel/printk/printk_nobkl.c
index 5c591bced1be..bc3b69223897 100644
--- a/kernel/printk/printk_nobkl.c
+++ b/kernel/printk/printk_nobkl.c
@@ -5,6 +5,8 @@ 
 #include <linux/kernel.h>
 #include <linux/console.h>
 #include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/slab.h>
 #include "printk_ringbuffer.h"
 #include "internal.h"
 /*
@@ -700,6 +702,7 @@  static bool __cons_try_acquire(struct cons_context *ctxt)
 	/* Set up the new state for takeover */
 	copy_full_state(new, old);
 	new.locked = 1;
+	new.thread = ctxt->thread;
 	new.cur_prio = ctxt->prio;
 	new.req_prio = CONS_PRIO_NONE;
 	new.cpu = cpu;
@@ -714,6 +717,14 @@  static bool __cons_try_acquire(struct cons_context *ctxt)
 		goto success;
 	}
 
+	/*
+	 * A threaded printer context will never spin or perform a
+	 * hostile takeover. The atomic writer will wake the thread
+	 * when it is done with the important output.
+	 */
+	if (ctxt->thread)
+		return false;
+
 	/*
 	 * If the active context is on the same CPU then there is
 	 * obviously no handshake possible.
@@ -871,6 +882,9 @@  static bool __cons_release(struct cons_context *ctxt)
 	return true;
 }
 
+static bool printk_threads_enabled __ro_after_init;
+static bool printk_force_atomic __initdata;
+
 /**
  * cons_release - Release the console after output is done
  * @ctxt:	The acquire context that contains the state
@@ -1141,7 +1155,7 @@  static bool cons_get_record(struct cons_write_context *wctxt)
  * When true is returned, @wctxt->ctxt.backlog indicates whether there are
  * still records pending in the ringbuffer,
  */
-static int __maybe_unused cons_emit_record(struct cons_write_context *wctxt)
+static bool cons_emit_record(struct cons_write_context *wctxt)
 {
 	struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
 	struct console *con = ctxt->console;
@@ -1176,6 +1190,8 @@  static int __maybe_unused cons_emit_record(struct cons_write_context *wctxt)
 
 	if (!ctxt->thread && con->write_atomic) {
 		done = con->write_atomic(con, wctxt);
+	} else if (ctxt->thread && con->write_thread) {
+		done = con->write_thread(con, wctxt);
 	} else {
 		cons_release(ctxt);
 		WARN_ON_ONCE(1);
@@ -1203,6 +1219,243 @@  static int __maybe_unused cons_emit_record(struct cons_write_context *wctxt)
 	return cons_seq_try_update(ctxt);
 }
 
+/**
+ * cons_kthread_should_wakeup - Check whether the printk thread should wakeup
+ * @con:	Console to operate on
+ * @ctxt:	The acquire context that contains the state
+ *		at console_acquire()
+ *
+ * Returns: True if the thread should shutdown or if the console is allowed to
+ * print and a record is available. False otherwise
+ *
+ * After the thread wakes up, it must first check if it should shutdown before
+ * attempting any printing.
+ */
+static bool cons_kthread_should_wakeup(struct console *con, struct cons_context *ctxt)
+{
+	bool is_usable;
+	short flags;
+	int cookie;
+
+	if (kthread_should_stop())
+		return true;
+
+	cookie = console_srcu_read_lock();
+	flags = console_srcu_read_flags(con);
+	is_usable = console_is_usable(con, flags);
+	console_srcu_read_unlock(cookie);
+
+	if (!is_usable)
+		return false;
+
+	/* This reads state and sequence on 64bit. On 32bit only state */
+	cons_state_read(con, CON_STATE_CUR, &ctxt->state);
+
+	/*
+	 * Atomic printing is running on some other CPU. The owner
+	 * will wake the console thread on unlock if necessary.
+	 */
+	if (ctxt->state.locked)
+		return false;
+
+	/* Bring the sequence in @ctxt up to date */
+	cons_context_set_seq(ctxt);
+
+	return prb_read_valid(prb, ctxt->oldseq, NULL);
+}
+
+/**
+ * cons_kthread_func - The printk thread function
+ * @__console:	Console to operate on
+ */
+static int cons_kthread_func(void *__console)
+{
+	struct console *con = __console;
+	struct cons_write_context wctxt = {
+		.ctxt.console	= con,
+		.ctxt.prio	= CONS_PRIO_NORMAL,
+		.ctxt.thread	= 1,
+	};
+	struct cons_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
+	unsigned long flags;
+	short con_flags;
+	bool backlog;
+	int cookie;
+	int ret;
+
+	for (;;) {
+		atomic_inc(&con->kthread_waiting);
+
+		/*
+		 * Provides a full memory barrier vs. cons_kthread_wake().
+		 */
+		ret = rcuwait_wait_event(&con->rcuwait,
+					 cons_kthread_should_wakeup(con, ctxt),
+					 TASK_INTERRUPTIBLE);
+
+		atomic_dec(&con->kthread_waiting);
+
+		if (kthread_should_stop())
+			break;
+
+		/* Wait was interrupted by a spurious signal, go back to sleep */
+		if (ret)
+			continue;
+
+		for (;;) {
+			cookie = console_srcu_read_lock();
+
+			/*
+			 * Ensure this stays on the CPU to make handover and
+			 * takeover possible.
+			 */
+			if (con->port_lock)
+				con->port_lock(con, true, &flags);
+			else
+				migrate_disable();
+
+			/*
+			 * Try to acquire the console without attempting to
+			 * take over. If an atomic printer wants to hand
+			 * back to the thread it simply wakes it up.
+			 */
+			if (!cons_try_acquire(ctxt))
+				break;
+
+			con_flags = console_srcu_read_flags(con);
+
+			if (console_is_usable(con, con_flags)) {
+				/*
+				 * If the emit fails, this context is no
+				 * longer the owner. Abort the processing and
+				 * wait for new records to print.
+				 */
+				if (!cons_emit_record(&wctxt))
+					break;
+				backlog = ctxt->backlog;
+			} else {
+				backlog = false;
+			}
+
+			/*
+			 * If the release fails, this context was not the
+			 * owner. Abort the processing and wait for new
+			 * records to print.
+			 */
+			if (!cons_release(ctxt))
+				break;
+
+			/* Backlog done? */
+			if (!backlog)
+				break;
+
+			if (con->port_lock)
+				con->port_lock(con, false, &flags);
+			else
+				migrate_enable();
+
+			console_srcu_read_unlock(cookie);
+
+			cond_resched();
+		}
+		if (con->port_lock)
+			con->port_lock(con, false, &flags);
+		else
+			migrate_enable();
+
+		console_srcu_read_unlock(cookie);
+	}
+	return 0;
+}
+
+/**
+ * cons_kthread_stop - Stop a printk thread
+ * @con:	Console to operate on
+ */
+static void cons_kthread_stop(struct console *con)
+{
+	lockdep_assert_console_list_lock_held();
+
+	if (!con->kthread)
+		return;
+
+	kthread_stop(con->kthread);
+	con->kthread = NULL;
+
+	kfree(con->thread_pbufs);
+	con->thread_pbufs = NULL;
+}
+
+/**
+ * cons_kthread_create - Create a printk thread
+ * @con:	Console to operate on
+ *
+ * If it fails, let the console proceed. The atomic part might
+ * be usable and useful.
+ */
+void cons_kthread_create(struct console *con)
+{
+	struct task_struct *kt;
+	struct console *c;
+
+	lockdep_assert_console_list_lock_held();
+
+	if (!(con->flags & CON_NO_BKL) || !con->write_thread)
+		return;
+
+	if (!printk_threads_enabled || con->kthread)
+		return;
+
+	/*
+	 * Printer threads cannot be started as long as any boot console is
+	 * registered because there is no way to synchronize the hardware
+	 * registers between boot console code and regular console code.
+	 */
+	for_each_console(c) {
+		if (c->flags & CON_BOOT)
+			return;
+	}
+	have_boot_console = false;
+
+	con->thread_pbufs = kmalloc(sizeof(*con->thread_pbufs), GFP_KERNEL);
+	if (!con->thread_pbufs) {
+		con_printk(KERN_ERR, con, "failed to allocate printing thread buffers\n");
+		return;
+	}
+
+	kt = kthread_run(cons_kthread_func, con, "pr/%s%d", con->name, con->index);
+	if (IS_ERR(kt)) {
+		con_printk(KERN_ERR, con, "failed to start printing thread\n");
+		kfree(con->thread_pbufs);
+		con->thread_pbufs = NULL;
+		return;
+	}
+
+	con->kthread = kt;
+
+	/*
+	 * It is important that console printing threads are scheduled
+	 * shortly after a printk call and with generous runtime budgets.
+	 */
+	sched_set_normal(con->kthread, -20);
+}
+
+static int __init printk_setup_threads(void)
+{
+	struct console *con;
+
+	if (printk_force_atomic)
+		return 0;
+
+	console_list_lock();
+	printk_threads_enabled = true;
+	for_each_console(con)
+		cons_kthread_create(con);
+	console_list_unlock();
+	return 0;
+}
+early_initcall(printk_setup_threads);
+
 /**
  * cons_nobkl_init - Initialize the NOBKL console specific data
  * @con:	Console to initialize
@@ -1216,9 +1469,12 @@  bool cons_nobkl_init(struct console *con)
 	if (!cons_alloc_percpu_data(con))
 		return false;
 
+	rcuwait_init(&con->rcuwait);
+	atomic_set(&con->kthread_waiting, 0);
 	cons_state_set(con, CON_STATE_CUR, &state);
 	cons_state_set(con, CON_STATE_REQ, &state);
 	cons_seq_init(con);
+	cons_kthread_create(con);
 	return true;
 }
 
@@ -1230,6 +1486,7 @@  void cons_nobkl_cleanup(struct console *con)
 {
 	struct cons_state state = { };
 
+	cons_kthread_stop(con);
 	cons_state_set(con, CON_STATE_CUR, &state);
 	cons_state_set(con, CON_STATE_REQ, &state);
 	cons_free_percpu_data(con);