[2/2] riscv: stacktrace: Make walk_stackframe cross pt_regs frame

Message ID 20221109064937.3643993-3-guoren@kernel.org
State New
Headers
Series riscv: stacktrace: A fixup and an optimization |

Commit Message

Guo Ren Nov. 9, 2022, 6:49 a.m. UTC
  From: Guo Ren <guoren@linux.alibaba.com>

The current walk_stackframe with FRAME_POINTER would stop unwinding at
ret_from_exception:
  BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518
  in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init
  CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192
  Call Trace:
  [<ffffffe0002038c8>] walk_stackframe+0x0/0xee
  [<ffffffe000aecf48>] show_stack+0x32/0x4a
  [<ffffffe000af1618>] dump_stack_lvl+0x72/0x8e
  [<ffffffe000af1648>] dump_stack+0x14/0x1c
  [<ffffffe000239ad2>] ___might_sleep+0x12e/0x138
  [<ffffffe000239aec>] __might_sleep+0x10/0x18
  [<ffffffe000afe3fe>] down_read+0x22/0xa4
  [<ffffffe000207588>] do_page_fault+0xb0/0x2fe
  [<ffffffe000201b80>] ret_from_exception+0x0/0xc

The optimization would help walk_stackframe cross the pt_regs frame and
get more backtrace of debug info:
  BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518
  in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init
  CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192
  Call Trace:
  [<ffffffe0002038c8>] walk_stackframe+0x0/0xee
  [<ffffffe000aecf48>] show_stack+0x32/0x4a
  [<ffffffe000af1618>] dump_stack_lvl+0x72/0x8e
  [<ffffffe000af1648>] dump_stack+0x14/0x1c
  [<ffffffe000239ad2>] ___might_sleep+0x12e/0x138
  [<ffffffe000239aec>] __might_sleep+0x10/0x18
  [<ffffffe000afe3fe>] down_read+0x22/0xa4
  [<ffffffe000207588>] do_page_fault+0xb0/0x2fe
  [<ffffffe000201b80>] ret_from_exception+0x0/0xc
  [<ffffffe000613c06>] riscv_intc_irq+0x1a/0x72
  [<ffffffe000201b80>] ret_from_exception+0x0/0xc
  [<ffffffe00033f44a>] vma_link+0x54/0x160
  [<ffffffe000341d7a>] mmap_region+0x2cc/0x4d0
  [<ffffffe000342256>] do_mmap+0x2d8/0x3ac
  [<ffffffe000326318>] vm_mmap_pgoff+0x70/0xb8
  [<ffffffe00032638a>] vm_mmap+0x2a/0x36
  [<ffffffe0003cfdde>] elf_map+0x72/0x84
  [<ffffffe0003d05f8>] load_elf_binary+0x69a/0xec8
  [<ffffffe000376240>] bprm_execve+0x246/0x53a
  [<ffffffe00037786c>] kernel_execve+0xe8/0x124
  [<ffffffe000aecdf2>] run_init_process+0xfa/0x10c
  [<ffffffe000aece16>] try_to_run_init_process+0x12/0x3c
  [<ffffffe000afa920>] kernel_init+0xb4/0xf8
  [<ffffffe000201b80>] ret_from_exception+0x0/0xc

Here is the error injection test code for the above output:
 drivers/irqchip/irq-riscv-intc.c:
 static asmlinkage void riscv_intc_irq(struct pt_regs *regs)
 {
        unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG;
+       u32 tmp; __get_user(tmp, (u32 *)0);

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Palmer Dabbelt <palmer@rivosinc.com>
Cc: Changbin Du <changbin.du@intel.com>
---
 arch/riscv/kernel/entry.S      | 2 +-
 arch/riscv/kernel/stacktrace.c | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)
  

Comments

Guo Ren Nov. 21, 2022, 9:53 a.m. UTC | #1
On Wed, Nov 9, 2022 at 2:50 PM <guoren@kernel.org> wrote:
>
> From: Guo Ren <guoren@linux.alibaba.com>
>
> The current walk_stackframe with FRAME_POINTER would stop unwinding at
> ret_from_exception:
>   BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518
>   in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init
>   CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192
>   Call Trace:
>   [<ffffffe0002038c8>] walk_stackframe+0x0/0xee
>   [<ffffffe000aecf48>] show_stack+0x32/0x4a
>   [<ffffffe000af1618>] dump_stack_lvl+0x72/0x8e
>   [<ffffffe000af1648>] dump_stack+0x14/0x1c
>   [<ffffffe000239ad2>] ___might_sleep+0x12e/0x138
>   [<ffffffe000239aec>] __might_sleep+0x10/0x18
>   [<ffffffe000afe3fe>] down_read+0x22/0xa4
>   [<ffffffe000207588>] do_page_fault+0xb0/0x2fe
>   [<ffffffe000201b80>] ret_from_exception+0x0/0xc
>
> The optimization would help walk_stackframe cross the pt_regs frame and
> get more backtrace of debug info:
>   BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518
>   in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init
>   CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192
>   Call Trace:
>   [<ffffffe0002038c8>] walk_stackframe+0x0/0xee
>   [<ffffffe000aecf48>] show_stack+0x32/0x4a
>   [<ffffffe000af1618>] dump_stack_lvl+0x72/0x8e
>   [<ffffffe000af1648>] dump_stack+0x14/0x1c
>   [<ffffffe000239ad2>] ___might_sleep+0x12e/0x138
>   [<ffffffe000239aec>] __might_sleep+0x10/0x18
>   [<ffffffe000afe3fe>] down_read+0x22/0xa4
>   [<ffffffe000207588>] do_page_fault+0xb0/0x2fe
>   [<ffffffe000201b80>] ret_from_exception+0x0/0xc
>   [<ffffffe000613c06>] riscv_intc_irq+0x1a/0x72
>   [<ffffffe000201b80>] ret_from_exception+0x0/0xc
>   [<ffffffe00033f44a>] vma_link+0x54/0x160
>   [<ffffffe000341d7a>] mmap_region+0x2cc/0x4d0
>   [<ffffffe000342256>] do_mmap+0x2d8/0x3ac
>   [<ffffffe000326318>] vm_mmap_pgoff+0x70/0xb8
>   [<ffffffe00032638a>] vm_mmap+0x2a/0x36
>   [<ffffffe0003cfdde>] elf_map+0x72/0x84
>   [<ffffffe0003d05f8>] load_elf_binary+0x69a/0xec8
>   [<ffffffe000376240>] bprm_execve+0x246/0x53a
>   [<ffffffe00037786c>] kernel_execve+0xe8/0x124
>   [<ffffffe000aecdf2>] run_init_process+0xfa/0x10c
>   [<ffffffe000aece16>] try_to_run_init_process+0x12/0x3c
>   [<ffffffe000afa920>] kernel_init+0xb4/0xf8
>   [<ffffffe000201b80>] ret_from_exception+0x0/0xc
>
> Here is the error injection test code for the above output:
>  drivers/irqchip/irq-riscv-intc.c:
>  static asmlinkage void riscv_intc_irq(struct pt_regs *regs)
>  {
>         unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG;
> +       u32 tmp; __get_user(tmp, (u32 *)0);
>
> Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> Signed-off-by: Guo Ren <guoren@kernel.org>
> Cc: Palmer Dabbelt <palmer@rivosinc.com>
> Cc: Changbin Du <changbin.du@intel.com>
> ---
>  arch/riscv/kernel/entry.S      | 2 +-
>  arch/riscv/kernel/stacktrace.c | 9 +++++++++
>  2 files changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index b9eda3fcbd6d..329cf51fcd4d 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -248,7 +248,7 @@ ret_from_syscall_rejected:
>         andi t0, t0, _TIF_SYSCALL_WORK
>         bnez t0, handle_syscall_trace_exit
>
> -ret_from_exception:
> +ENTRY(ret_from_exception)
>         REG_L s0, PT_STATUS(sp)
>         csrc CSR_STATUS, SR_IE
>  #ifdef CONFIG_TRACE_IRQFLAGS
> diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
> index bcfe9eb55f80..75c8dd64fc48 100644
> --- a/arch/riscv/kernel/stacktrace.c
> +++ b/arch/riscv/kernel/stacktrace.c
> @@ -16,6 +16,8 @@
>
>  #ifdef CONFIG_FRAME_POINTER
>
> +extern asmlinkage void ret_from_exception(void);
> +
>  void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
>                              bool (*fn)(void *, unsigned long), void *arg)
>  {
> @@ -59,6 +61,13 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
>                         fp = frame->fp;
>                         pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
>                                                    &frame->ra);
> +                       if (pc == (unsigned long) ret_from_exception) {
I forgot ret_from_syscall because I tested it on the generic_entry
series base. I would merge this patch into the generic_entry series as
an optimization.

> +                               if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
> +                                       break;
> +
> +                               pc = ((struct pt_regs *)sp)->epc;
> +                               fp = ((struct pt_regs *)sp)->s0;
> +                       }
>                 }
>
>         }
> --
> 2.36.1
>
  
Guo Ren Nov. 21, 2022, 11:49 a.m. UTC | #2
On Mon, Nov 21, 2022 at 5:53 PM Guo Ren <guoren@kernel.org> wrote:
>
> On Wed, Nov 9, 2022 at 2:50 PM <guoren@kernel.org> wrote:
> >
> > From: Guo Ren <guoren@linux.alibaba.com>
> >
> > The current walk_stackframe with FRAME_POINTER would stop unwinding at
> > ret_from_exception:
> >   BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518
> >   in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init
> >   CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192
> >   Call Trace:
> >   [<ffffffe0002038c8>] walk_stackframe+0x0/0xee
> >   [<ffffffe000aecf48>] show_stack+0x32/0x4a
> >   [<ffffffe000af1618>] dump_stack_lvl+0x72/0x8e
> >   [<ffffffe000af1648>] dump_stack+0x14/0x1c
> >   [<ffffffe000239ad2>] ___might_sleep+0x12e/0x138
> >   [<ffffffe000239aec>] __might_sleep+0x10/0x18
> >   [<ffffffe000afe3fe>] down_read+0x22/0xa4
> >   [<ffffffe000207588>] do_page_fault+0xb0/0x2fe
> >   [<ffffffe000201b80>] ret_from_exception+0x0/0xc
> >
> > The optimization would help walk_stackframe cross the pt_regs frame and
> > get more backtrace of debug info:
> >   BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518
> >   in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init
> >   CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192
> >   Call Trace:
> >   [<ffffffe0002038c8>] walk_stackframe+0x0/0xee
> >   [<ffffffe000aecf48>] show_stack+0x32/0x4a
> >   [<ffffffe000af1618>] dump_stack_lvl+0x72/0x8e
> >   [<ffffffe000af1648>] dump_stack+0x14/0x1c
> >   [<ffffffe000239ad2>] ___might_sleep+0x12e/0x138
> >   [<ffffffe000239aec>] __might_sleep+0x10/0x18
> >   [<ffffffe000afe3fe>] down_read+0x22/0xa4
> >   [<ffffffe000207588>] do_page_fault+0xb0/0x2fe
> >   [<ffffffe000201b80>] ret_from_exception+0x0/0xc
> >   [<ffffffe000613c06>] riscv_intc_irq+0x1a/0x72
> >   [<ffffffe000201b80>] ret_from_exception+0x0/0xc
> >   [<ffffffe00033f44a>] vma_link+0x54/0x160
> >   [<ffffffe000341d7a>] mmap_region+0x2cc/0x4d0
> >   [<ffffffe000342256>] do_mmap+0x2d8/0x3ac
> >   [<ffffffe000326318>] vm_mmap_pgoff+0x70/0xb8
> >   [<ffffffe00032638a>] vm_mmap+0x2a/0x36
> >   [<ffffffe0003cfdde>] elf_map+0x72/0x84
> >   [<ffffffe0003d05f8>] load_elf_binary+0x69a/0xec8
> >   [<ffffffe000376240>] bprm_execve+0x246/0x53a
> >   [<ffffffe00037786c>] kernel_execve+0xe8/0x124
> >   [<ffffffe000aecdf2>] run_init_process+0xfa/0x10c
> >   [<ffffffe000aece16>] try_to_run_init_process+0x12/0x3c
> >   [<ffffffe000afa920>] kernel_init+0xb4/0xf8
> >   [<ffffffe000201b80>] ret_from_exception+0x0/0xc
> >
> > Here is the error injection test code for the above output:
> >  drivers/irqchip/irq-riscv-intc.c:
> >  static asmlinkage void riscv_intc_irq(struct pt_regs *regs)
> >  {
> >         unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG;
> > +       u32 tmp; __get_user(tmp, (u32 *)0);
> >
> > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > Signed-off-by: Guo Ren <guoren@kernel.org>
> > Cc: Palmer Dabbelt <palmer@rivosinc.com>
> > Cc: Changbin Du <changbin.du@intel.com>
> > ---
> >  arch/riscv/kernel/entry.S      | 2 +-
> >  arch/riscv/kernel/stacktrace.c | 9 +++++++++
> >  2 files changed, 10 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> > index b9eda3fcbd6d..329cf51fcd4d 100644
> > --- a/arch/riscv/kernel/entry.S
> > +++ b/arch/riscv/kernel/entry.S
> > @@ -248,7 +248,7 @@ ret_from_syscall_rejected:
> >         andi t0, t0, _TIF_SYSCALL_WORK
> >         bnez t0, handle_syscall_trace_exit
> >
> > -ret_from_exception:
> > +ENTRY(ret_from_exception)
> >         REG_L s0, PT_STATUS(sp)
> >         csrc CSR_STATUS, SR_IE
> >  #ifdef CONFIG_TRACE_IRQFLAGS
> > diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
> > index bcfe9eb55f80..75c8dd64fc48 100644
> > --- a/arch/riscv/kernel/stacktrace.c
> > +++ b/arch/riscv/kernel/stacktrace.c
> > @@ -16,6 +16,8 @@
> >
> >  #ifdef CONFIG_FRAME_POINTER
> >
> > +extern asmlinkage void ret_from_exception(void);
> > +
> >  void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
> >                              bool (*fn)(void *, unsigned long), void *arg)
> >  {
> > @@ -59,6 +61,13 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
> >                         fp = frame->fp;
> >                         pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
> >                                                    &frame->ra);
> > +                       if (pc == (unsigned long) ret_from_exception) {
> I forgot ret_from_syscall because I tested it on the generic_entry
> series base. I would merge this patch into the generic_entry series as
> an optimization.
No, the patch is correct. The ret_from_syscall is unnecessary because
it's from user space.

>
> > +                               if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
> > +                                       break;
> > +
> > +                               pc = ((struct pt_regs *)sp)->epc;
> > +                               fp = ((struct pt_regs *)sp)->s0;
> > +                       }
> >                 }
> >
> >         }
> > --
> > 2.36.1
> >
>
>
> --
> Best Regards
>  Guo Ren
  
Palmer Dabbelt Dec. 6, 2022, 2:59 a.m. UTC | #3
On Tue, 08 Nov 2022 22:49:37 PST (-0800), guoren@kernel.org wrote:
> From: Guo Ren <guoren@linux.alibaba.com>
>
> The current walk_stackframe with FRAME_POINTER would stop unwinding at
> ret_from_exception:
>   BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518
>   in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init
>   CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192
>   Call Trace:
>   [<ffffffe0002038c8>] walk_stackframe+0x0/0xee
>   [<ffffffe000aecf48>] show_stack+0x32/0x4a
>   [<ffffffe000af1618>] dump_stack_lvl+0x72/0x8e
>   [<ffffffe000af1648>] dump_stack+0x14/0x1c
>   [<ffffffe000239ad2>] ___might_sleep+0x12e/0x138
>   [<ffffffe000239aec>] __might_sleep+0x10/0x18
>   [<ffffffe000afe3fe>] down_read+0x22/0xa4
>   [<ffffffe000207588>] do_page_fault+0xb0/0x2fe
>   [<ffffffe000201b80>] ret_from_exception+0x0/0xc
>
> The optimization would help walk_stackframe cross the pt_regs frame and
> get more backtrace of debug info:
>   BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518
>   in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init
>   CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192
>   Call Trace:
>   [<ffffffe0002038c8>] walk_stackframe+0x0/0xee
>   [<ffffffe000aecf48>] show_stack+0x32/0x4a
>   [<ffffffe000af1618>] dump_stack_lvl+0x72/0x8e
>   [<ffffffe000af1648>] dump_stack+0x14/0x1c
>   [<ffffffe000239ad2>] ___might_sleep+0x12e/0x138
>   [<ffffffe000239aec>] __might_sleep+0x10/0x18
>   [<ffffffe000afe3fe>] down_read+0x22/0xa4
>   [<ffffffe000207588>] do_page_fault+0xb0/0x2fe
>   [<ffffffe000201b80>] ret_from_exception+0x0/0xc
>   [<ffffffe000613c06>] riscv_intc_irq+0x1a/0x72
>   [<ffffffe000201b80>] ret_from_exception+0x0/0xc
>   [<ffffffe00033f44a>] vma_link+0x54/0x160
>   [<ffffffe000341d7a>] mmap_region+0x2cc/0x4d0
>   [<ffffffe000342256>] do_mmap+0x2d8/0x3ac
>   [<ffffffe000326318>] vm_mmap_pgoff+0x70/0xb8
>   [<ffffffe00032638a>] vm_mmap+0x2a/0x36
>   [<ffffffe0003cfdde>] elf_map+0x72/0x84
>   [<ffffffe0003d05f8>] load_elf_binary+0x69a/0xec8
>   [<ffffffe000376240>] bprm_execve+0x246/0x53a
>   [<ffffffe00037786c>] kernel_execve+0xe8/0x124
>   [<ffffffe000aecdf2>] run_init_process+0xfa/0x10c
>   [<ffffffe000aece16>] try_to_run_init_process+0x12/0x3c
>   [<ffffffe000afa920>] kernel_init+0xb4/0xf8
>   [<ffffffe000201b80>] ret_from_exception+0x0/0xc
>
> Here is the error injection test code for the above output:
>  drivers/irqchip/irq-riscv-intc.c:
>  static asmlinkage void riscv_intc_irq(struct pt_regs *regs)
>  {
>         unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG;
> +       u32 tmp; __get_user(tmp, (u32 *)0);
>
> Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> Signed-off-by: Guo Ren <guoren@kernel.org>
> Cc: Palmer Dabbelt <palmer@rivosinc.com>
> Cc: Changbin Du <changbin.du@intel.com>
> ---
>  arch/riscv/kernel/entry.S      | 2 +-
>  arch/riscv/kernel/stacktrace.c | 9 +++++++++
>  2 files changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index b9eda3fcbd6d..329cf51fcd4d 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -248,7 +248,7 @@ ret_from_syscall_rejected:
>  	andi t0, t0, _TIF_SYSCALL_WORK
>  	bnez t0, handle_syscall_trace_exit
>
> -ret_from_exception:
> +ENTRY(ret_from_exception)

This at least needs an END(), but it should also be converted over to 
some non-function entry flavor.  I converted it over to 
SYM_CODE_START_NOALIGN(), with the cooresponding SYM_CODE_END(), and put 
it on for-next.

>  	REG_L s0, PT_STATUS(sp)
>  	csrc CSR_STATUS, SR_IE
>  #ifdef CONFIG_TRACE_IRQFLAGS
> diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
> index bcfe9eb55f80..75c8dd64fc48 100644
> --- a/arch/riscv/kernel/stacktrace.c
> +++ b/arch/riscv/kernel/stacktrace.c
> @@ -16,6 +16,8 @@
>
>  #ifdef CONFIG_FRAME_POINTER
>
> +extern asmlinkage void ret_from_exception(void);
> +
>  void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
>  			     bool (*fn)(void *, unsigned long), void *arg)
>  {
> @@ -59,6 +61,13 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
>  			fp = frame->fp;
>  			pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
>  						   &frame->ra);
> +			if (pc == (unsigned long)ret_from_exception) {
> +				if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
> +					break;
> +
> +				pc = ((struct pt_regs *)sp)->epc;
> +				fp = ((struct pt_regs *)sp)->s0;
> +			}
>  		}
>
>  	}
  
Guo Ren Dec. 6, 2022, 3:30 a.m. UTC | #4
On Tue, Dec 6, 2022 at 10:59 AM Palmer Dabbelt <palmer@dabbelt.com> wrote:
>
> On Tue, 08 Nov 2022 22:49:37 PST (-0800), guoren@kernel.org wrote:
> > From: Guo Ren <guoren@linux.alibaba.com>
> >
> > The current walk_stackframe with FRAME_POINTER would stop unwinding at
> > ret_from_exception:
> >   BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518
> >   in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init
> >   CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192
> >   Call Trace:
> >   [<ffffffe0002038c8>] walk_stackframe+0x0/0xee
> >   [<ffffffe000aecf48>] show_stack+0x32/0x4a
> >   [<ffffffe000af1618>] dump_stack_lvl+0x72/0x8e
> >   [<ffffffe000af1648>] dump_stack+0x14/0x1c
> >   [<ffffffe000239ad2>] ___might_sleep+0x12e/0x138
> >   [<ffffffe000239aec>] __might_sleep+0x10/0x18
> >   [<ffffffe000afe3fe>] down_read+0x22/0xa4
> >   [<ffffffe000207588>] do_page_fault+0xb0/0x2fe
> >   [<ffffffe000201b80>] ret_from_exception+0x0/0xc
> >
> > The optimization would help walk_stackframe cross the pt_regs frame and
> > get more backtrace of debug info:
> >   BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518
> >   in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init
> >   CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192
> >   Call Trace:
> >   [<ffffffe0002038c8>] walk_stackframe+0x0/0xee
> >   [<ffffffe000aecf48>] show_stack+0x32/0x4a
> >   [<ffffffe000af1618>] dump_stack_lvl+0x72/0x8e
> >   [<ffffffe000af1648>] dump_stack+0x14/0x1c
> >   [<ffffffe000239ad2>] ___might_sleep+0x12e/0x138
> >   [<ffffffe000239aec>] __might_sleep+0x10/0x18
> >   [<ffffffe000afe3fe>] down_read+0x22/0xa4
> >   [<ffffffe000207588>] do_page_fault+0xb0/0x2fe
> >   [<ffffffe000201b80>] ret_from_exception+0x0/0xc
> >   [<ffffffe000613c06>] riscv_intc_irq+0x1a/0x72
> >   [<ffffffe000201b80>] ret_from_exception+0x0/0xc
> >   [<ffffffe00033f44a>] vma_link+0x54/0x160
> >   [<ffffffe000341d7a>] mmap_region+0x2cc/0x4d0
> >   [<ffffffe000342256>] do_mmap+0x2d8/0x3ac
> >   [<ffffffe000326318>] vm_mmap_pgoff+0x70/0xb8
> >   [<ffffffe00032638a>] vm_mmap+0x2a/0x36
> >   [<ffffffe0003cfdde>] elf_map+0x72/0x84
> >   [<ffffffe0003d05f8>] load_elf_binary+0x69a/0xec8
> >   [<ffffffe000376240>] bprm_execve+0x246/0x53a
> >   [<ffffffe00037786c>] kernel_execve+0xe8/0x124
> >   [<ffffffe000aecdf2>] run_init_process+0xfa/0x10c
> >   [<ffffffe000aece16>] try_to_run_init_process+0x12/0x3c
> >   [<ffffffe000afa920>] kernel_init+0xb4/0xf8
> >   [<ffffffe000201b80>] ret_from_exception+0x0/0xc
> >
> > Here is the error injection test code for the above output:
> >  drivers/irqchip/irq-riscv-intc.c:
> >  static asmlinkage void riscv_intc_irq(struct pt_regs *regs)
> >  {
> >         unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG;
> > +       u32 tmp; __get_user(tmp, (u32 *)0);
> >
> > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > Signed-off-by: Guo Ren <guoren@kernel.org>
> > Cc: Palmer Dabbelt <palmer@rivosinc.com>
> > Cc: Changbin Du <changbin.du@intel.com>
> > ---
> >  arch/riscv/kernel/entry.S      | 2 +-
> >  arch/riscv/kernel/stacktrace.c | 9 +++++++++
> >  2 files changed, 10 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> > index b9eda3fcbd6d..329cf51fcd4d 100644
> > --- a/arch/riscv/kernel/entry.S
> > +++ b/arch/riscv/kernel/entry.S
> > @@ -248,7 +248,7 @@ ret_from_syscall_rejected:
> >       andi t0, t0, _TIF_SYSCALL_WORK
> >       bnez t0, handle_syscall_trace_exit
> >
> > -ret_from_exception:
> > +ENTRY(ret_from_exception)
>
> This at least needs an END(), but it should also be converted over to
Yes, I missed END() here.

> some non-function entry flavor.  I converted it over to
> SYM_CODE_START_NOALIGN(), with the cooresponding SYM_CODE_END(), and put
> it on for-next.
Is that also for ret_from_fork & __switch_to?

>
> >       REG_L s0, PT_STATUS(sp)
> >       csrc CSR_STATUS, SR_IE
> >  #ifdef CONFIG_TRACE_IRQFLAGS
> > diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
> > index bcfe9eb55f80..75c8dd64fc48 100644
> > --- a/arch/riscv/kernel/stacktrace.c
> > +++ b/arch/riscv/kernel/stacktrace.c
> > @@ -16,6 +16,8 @@
> >
> >  #ifdef CONFIG_FRAME_POINTER
> >
> > +extern asmlinkage void ret_from_exception(void);
> > +
> >  void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
> >                            bool (*fn)(void *, unsigned long), void *arg)
> >  {
> > @@ -59,6 +61,13 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
> >                       fp = frame->fp;
> >                       pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
> >                                                  &frame->ra);
> > +                     if (pc == (unsigned long)ret_from_exception) {
> > +                             if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
> > +                                     break;
> > +
> > +                             pc = ((struct pt_regs *)sp)->epc;
> > +                             fp = ((struct pt_regs *)sp)->s0;
> > +                     }
> >               }
> >
> >       }
  

Patch

diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index b9eda3fcbd6d..329cf51fcd4d 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -248,7 +248,7 @@  ret_from_syscall_rejected:
 	andi t0, t0, _TIF_SYSCALL_WORK
 	bnez t0, handle_syscall_trace_exit
 
-ret_from_exception:
+ENTRY(ret_from_exception)
 	REG_L s0, PT_STATUS(sp)
 	csrc CSR_STATUS, SR_IE
 #ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index bcfe9eb55f80..75c8dd64fc48 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -16,6 +16,8 @@ 
 
 #ifdef CONFIG_FRAME_POINTER
 
+extern asmlinkage void ret_from_exception(void);
+
 void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
 			     bool (*fn)(void *, unsigned long), void *arg)
 {
@@ -59,6 +61,13 @@  void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
 			fp = frame->fp;
 			pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
 						   &frame->ra);
+			if (pc == (unsigned long)ret_from_exception) {
+				if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
+					break;
+
+				pc = ((struct pt_regs *)sp)->epc;
+				fp = ((struct pt_regs *)sp)->s0;
+			}
 		}
 
 	}