[1/2] printk: nbcon: move locked_port flag to struct uart_port

Message ID 20240123054033.183114-2-junxiao.chang@intel.com
State New
Headers
Series nbcon locking issue with v6.6.10-rt18 kernel |

Commit Message

Chang, Junxiao Jan. 23, 2024, 5:40 a.m. UTC
  Console pointer in uart_port might be shared among multiple uart
ports. Flag port locked by nbcon should be saved in uart_port
structure instead of in console structure.

Fixes: 6424f396c49e ("printk: nbcon: Implement processing in port->lock wrapper")
Suggested-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Junxiao Chang <junxiao.chang@intel.com>
---
 include/linux/console.h     | 2 --
 include/linux/serial_core.h | 1 +
 kernel/printk/nbcon.c       | 8 ++++----
 3 files changed, 5 insertions(+), 6 deletions(-)
  

Comments

John Ogness Jan. 24, 2024, 9:47 a.m. UTC | #1
On 2024-01-23, Junxiao Chang <junxiao.chang@intel.com> wrote:
> Console pointer in uart_port might be shared among multiple uart
> ports.

I still want to investigate why the pointer is shared. This sounds
sloppy or dangerous.

> Flag port locked by nbcon should be saved in uart_port
> structure instead of in console structure.

If it turns out that the pointer sharing is necessary, this patch will
fix the reported problem.

Reviewed-by: John Ogness <john.ogness@linutronix.de>
  
Sebastian Andrzej Siewior Jan. 24, 2024, 10:05 a.m. UTC | #2
On 2024-01-24 10:53:10 [+0106], John Ogness wrote:
> On 2024-01-23, Junxiao Chang <junxiao.chang@intel.com> wrote:
> > Console pointer in uart_port might be shared among multiple uart
> > ports.
> 
> I still want to investigate why the pointer is shared. This sounds
> sloppy or dangerous.

I have x86 a server box and PNP enumerates two UARTs (8250). Only one is
wired up but both can be specified as console=.
What do I need to do to reproduce this here? Using console= twice does
not do the trick.

Sebastian
  
Chang, Junxiao Jan. 25, 2024, 1:08 a.m. UTC | #3
> > > Console pointer in uart_port might be shared among multiple uart 
> > > ports.
> > 
> > I still want to investigate why the pointer is shared. This sounds 
> > sloppy or dangerous.

> I have x86 a server box and PNP enumerates two UARTs (8250). Only one is wired up but both can be specified as console=.
> What do I need to do to reproduce this here? Using console= twice does not do the trick.

Issue could be reproduced with our hardware every time. My cmdline is: BOOT_IMAGE=(hd0,gpt2)/boot/bzImage-linux-intel-iot-lts-rt-6.6-kernel root=PARTLABEL=primary rootwait console=ttyS0,115200 console=tty0 init=/sbin/preinit-env console=ttyS4,115200n8 console=ttyS5,115200n8

If you would like to try any debug patch with my ADL hardware, please feel free to let me know.

For console pointer sharing issue, from code logic point of view, the call chain looks like:
serial8250_register_8250_port -> uart_add_one_port -> serial_ctrl_register_port -> serial_core_register_port -> serial_core_add_one_port

In API serial_core_add_one_port, uart_port's console pointer is assigned with driver's console pointer:
	uport->cons = drv->cons;
Driver's console pointer points to static structure "univ8250_console" which is defined in 8250_core.c

That is, all 8250 serial devices' console pointer are same, they point to univ8250_console.

Below is debug log output:
sh-5.1# dmesg | grep @@@@
[    1.687121] @@@@ univ8250_console_init univ8250_console address:ffffffff935df1e0
[    3.419880] @@@@ serial8250_register_8250_port: name:ttyS4, cons pointer:ffffffff935df1e0
[    3.534954] @@@@ serial8250_register_8250_port: name:ttyS5, cons pointer:ffffffff935df1e0
[   11.971345] @@@@ serial8250_do_shutdown, curr thread:(agetty), name:ttyS0, line 0, cons pointer:ffffffff935df1e0
[   11.971506] @@@@ serial8250_do_shutdown, curr thread:(agetty), name:ttyS4, line 4, cons pointer:ffffffff935df1e0
[   11.971849] @@@@ serial8250_do_shutdown, curr thread:(agetty), name:ttyS4, line 4, cons pointer:ffffffff935df1e0
[   11.983072] @@@@ serial8250_do_shutdown, curr thread:agetty, name:ttyS4, line 4, cons pointer:ffffffff935df1e0
[   11.983595] @@@@ serial8250_do_shutdown, curr thread:(agetty), name:ttyS5, line 5, cons pointer:ffffffff935df1e0
[   11.983895] @@@@ serial8250_do_shutdown, curr thread:(agetty), name:ttyS5, line 5, cons pointer:ffffffff935df1e0
[   12.009632] @@@@ serial8250_do_shutdown, curr thread:agetty, name:ttyS5, line 5, cons pointer:ffffffff935df1e0
sh-5.1#

All cons pointers address are same "ffffffff935df1e0".

Debug patch:
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 30434718fad80..740fd7e133a28 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -755,6 +755,7 @@ static int __init univ8250_console_init(void)

        serial8250_isa_init_ports();
        register_console(&univ8250_console);
+       printk("@@@@ %s univ8250_console address:%lx\n", __func__, (unsigned long)(&univ8250_console));
        return 0;
 }
 console_initcall(univ8250_console_init);
@@ -1181,6 +1182,7 @@ int serial8250_register_8250_port(const struct uart_8250_port *up)
                        if (ret)
                                goto err;

+                       printk("@@@@ %s: name:%s, cons pointer:%lx\n", __func__, uart->port.name, (unsigned long)uart->port.cons);
                        ret = uart->port.line;
                } else {
                        dev_info(uart->port.dev,
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 437a7d3d34cde..93f1e548d1301 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2510,6 +2510,7 @@ void serial8250_do_shutdown(struct uart_port *port)
        struct uart_8250_port *up = up_to_u8250p(port);
        unsigned long flags;

+       printk("@@@@ %s, curr thread:%s, name:%s, line %d, cons pointer:%lx\n", __func__, current->comm, port->name, port->line, (unsigned long)port->cons);
        serial8250_rpm_get(up);
        /*
         * Disable interrupts from this port

Thanks,
Junxiao
  
Sebastian Andrzej Siewior Jan. 25, 2024, 1:35 p.m. UTC | #4
On 2024-01-25 01:08:24 [+0000], Chang, Junxiao wrote:
> 
> Issue could be reproduced with our hardware every time. My cmdline is:
> BOOT_IMAGE=(hd0,gpt2)/boot/bzImage-linux-intel-iot-lts-rt-6.6-kernel
> root=PARTLABEL=primary rootwait console=ttyS0,115200 console=tty0
> init=/sbin/preinit-env console=ttyS4,115200n8 console=ttyS5,115200n8
> 
> If you would like to try any debug patch with my ADL hardware, please feel free to let me know.
> 
> For console pointer sharing issue, from code logic point of view, the call chain looks like:
> serial8250_register_8250_port -> uart_add_one_port -> serial_ctrl_register_port -> serial_core_register_port -> serial_core_add_one_port
> 
> In API serial_core_add_one_port, uart_port's console pointer is assigned with driver's console pointer:
> 	uport->cons = drv->cons;
> Driver's console pointer points to static structure "univ8250_console" which is defined in 8250_core.c
> 
> That is, all 8250 serial devices' console pointer are same, they point to univ8250_console.

Okay, So that I see this and the unbalanced acquire/ release part with
the attached patch. I leave it to John…
Btw. You don't see kernel log output on ttyS4 + ttyS5, right? Just a
login prompt.
…
> Thanks,
> Junxiao

Sebastian
  
Chang, Junxiao Jan. 25, 2024, 11:20 p.m. UTC | #5
> >  That is, all 8250 serial devices' console pointer are same, they point to univ8250_console.

> Okay, So that I see this and the unbalanced acquire/ release part with the attached patch. I leave it to John… Btw. You don't see kernel log output on ttyS4 + ttyS5, right? Just a login prompt.
>
> Sebastian

Right. At that time, only ttyS0 is console and nbcon.

Thanks,
Junxiao
  
John Ogness Jan. 26, 2024, 7:58 a.m. UTC | #6
Hi Sebastian,

On 2024-01-25, Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> Okay, So that I see this and the unbalanced acquire/ release part with
> the attached patch. I leave it to John...

Please add this one patch to the 6.6-rt and later queues. The 2nd patch
in this series is not needed.

For 6.8 the fix may end up looking different, i.e. by eliminating the
struct console pointer sharing instead. But for now this patch is fully
sufficient.

Thanks.

John
  
Sebastian Andrzej Siewior Jan. 26, 2024, 4:39 p.m. UTC | #7
On 2024-01-26 09:04:34 [+0106], John Ogness wrote:
> Hi Sebastian,
Hi,

> Please add this one patch to the 6.6-rt and later queues. The 2nd patch
> in this series is not needed.
Okay. I just dropped a v6.8-RT with this included and I am going to poke
Clark regarding v6.6 next week.

> Thanks.
> 
> John

Sebastian
  

Patch

diff --git a/include/linux/console.h b/include/linux/console.h
index f8a0628678886..1eb9580e9b18a 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -304,7 +304,6 @@  struct nbcon_write_context {
  * @nbcon_state:	State for nbcon consoles
  * @nbcon_seq:		Sequence number of the next record for nbcon to print
  * @pbufs:		Pointer to nbcon private buffer
- * @locked_port:	True, if the port lock is locked by nbcon
  * @kthread:		Printer kthread for this console
  * @rcuwait:		RCU-safe wait object for @kthread waking
  * @irq_work:		Defer @kthread waking to IRQ work context
@@ -338,7 +337,6 @@  struct console {
 	atomic_t		__private nbcon_state;
 	atomic_long_t		__private nbcon_seq;
 	struct printk_buffers	*pbufs;
-	bool			locked_port;
 	struct task_struct	*kthread;
 	struct rcuwait		rcuwait;
 	struct irq_work		irq_work;
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 245c11753effd..b2221a50fcb29 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -488,6 +488,7 @@  struct uart_port {
 	struct uart_icount	icount;			/* statistics */
 
 	struct console		*cons;			/* struct console, if any */
+	bool			nbcon_locked_port;	/* True, if the port is locked by nbcon */
 	/* flags must be updated while holding port mutex */
 	upf_t			flags;
 
diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c
index 1b1b585b1675b..b53d93585ee71 100644
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -1586,7 +1586,7 @@  void nbcon_acquire(struct uart_port *up)
 	if (!uart_is_nbcon(up))
 		return;
 
-	WARN_ON_ONCE(con->locked_port);
+	WARN_ON_ONCE(up->nbcon_locked_port);
 
 	do {
 		do {
@@ -1597,7 +1597,7 @@  void nbcon_acquire(struct uart_port *up)
 
 	} while (!nbcon_context_enter_unsafe(&ctxt));
 
-	con->locked_port = true;
+	up->nbcon_locked_port = true;
 }
 EXPORT_SYMBOL_GPL(nbcon_acquire);
 
@@ -1623,13 +1623,13 @@  void nbcon_release(struct uart_port *up)
 		.prio		= NBCON_PRIO_NORMAL,
 	};
 
-	if (!con->locked_port)
+	if (!up->nbcon_locked_port)
 		return;
 
 	if (nbcon_context_exit_unsafe(&ctxt))
 		nbcon_context_release(&ctxt);
 
-	con->locked_port = false;
+	up->nbcon_locked_port = false;
 }
 EXPORT_SYMBOL_GPL(nbcon_release);