[v2,2/4] fprobe: make fprobe_kprobe_handler recursion free

Message ID 20230516071830.8190-3-zegao@tencent.com
State New
Headers
Series Make fprobe + rethook immune to recursion |

Commit Message

Ze Gao May 16, 2023, 7:18 a.m. UTC
  Current implementation calls kprobe related functions before doing
ftrace recursion check in fprobe_kprobe_handler, which opens door
to kernel crash due to stack recursion if preempt_count_{add, sub}
is traceable.

Refactor the common part out of fprobe_kprobe_handler and fprobe_
handler and call ftrace recursion detection at the very beginning,
so that the whole fprobe_kprobe_handler is free from recursion.

Signed-off-by: Ze Gao <zegao@tencent.com>
---
 kernel/trace/fprobe.c | 59 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 15 deletions(-)
  

Comments

Peter Zijlstra May 16, 2023, 9:15 a.m. UTC | #1
On Tue, May 16, 2023 at 03:18:28PM +0800, Ze Gao wrote:

> +static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> +		struct ftrace_ops *ops, struct ftrace_regs *fregs)
> +{
> +	struct fprobe *fp;
> +	int bit;
> +
> +	fp = container_of(ops, struct fprobe, ops);
> +	if (fprobe_disabled(fp))
> +		return;
> +
> +	/* recursion detection has to go before any traceable function and
> +	 * all functions before this point should be marked as notrace
> +	 */
> +	bit = ftrace_test_recursion_trylock(ip, parent_ip);
> +	if (bit < 0) {
> +		fp->nmissed++;
> +		return;
> +	}
> +	__fprobe_handler(ip, parent_ip, ops, fregs);
>  	ftrace_test_recursion_unlock(bit);
> +
>  }
>  NOKPROBE_SYMBOL(fprobe_handler);
>  
>  static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
>  				  struct ftrace_ops *ops, struct ftrace_regs *fregs)
>  {
> -	struct fprobe *fp = container_of(ops, struct fprobe, ops);
> +	struct fprobe *fp;
> +	int bit;
> +
> +	fp = container_of(ops, struct fprobe, ops);
> +	if (fprobe_disabled(fp))
> +		return;
> +
> +	/* recursion detection has to go before any traceable function and
> +	 * all functions called before this point should be marked as notrace
> +	 */
> +	bit = ftrace_test_recursion_trylock(ip, parent_ip);
> +	if (bit < 0) {
> +		fp->nmissed++;
> +		return;
> +	}

Please don't use this comment style; multi line comments go like:

	/*
	 * Multi line comment ...
	 *                    ... is symmetric.
	 */

Same for your next patch.
  
Peter Zijlstra May 16, 2023, 9:18 a.m. UTC | #2
On Tue, May 16, 2023 at 03:18:28PM +0800, Ze Gao wrote:
> Current implementation calls kprobe related functions before doing
> ftrace recursion check in fprobe_kprobe_handler, which opens door
> to kernel crash due to stack recursion if preempt_count_{add, sub}
> is traceable.

Which preempt_count*() are you referring to? The ones you just made
_notrace in the previous patch?
  
Ze Gao May 16, 2023, 9:35 a.m. UTC | #3
Thanks for pointing this out,  I'll get it all fixed ASAP.

Regards,
Ze

On Tue, May 16, 2023 at 5:16 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Tue, May 16, 2023 at 03:18:28PM +0800, Ze Gao wrote:
>
> > +static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> > +             struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > +{
> > +     struct fprobe *fp;
> > +     int bit;
> > +
> > +     fp = container_of(ops, struct fprobe, ops);
> > +     if (fprobe_disabled(fp))
> > +             return;
> > +
> > +     /* recursion detection has to go before any traceable function and
> > +      * all functions before this point should be marked as notrace
> > +      */
> > +     bit = ftrace_test_recursion_trylock(ip, parent_ip);
> > +     if (bit < 0) {
> > +             fp->nmissed++;
> > +             return;
> > +     }
> > +     __fprobe_handler(ip, parent_ip, ops, fregs);
> >       ftrace_test_recursion_unlock(bit);
> > +
> >  }
> >  NOKPROBE_SYMBOL(fprobe_handler);
> >
> >  static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
> >                                 struct ftrace_ops *ops, struct ftrace_regs *fregs)
> >  {
> > -     struct fprobe *fp = container_of(ops, struct fprobe, ops);
> > +     struct fprobe *fp;
> > +     int bit;
> > +
> > +     fp = container_of(ops, struct fprobe, ops);
> > +     if (fprobe_disabled(fp))
> > +             return;
> > +
> > +     /* recursion detection has to go before any traceable function and
> > +      * all functions called before this point should be marked as notrace
> > +      */
> > +     bit = ftrace_test_recursion_trylock(ip, parent_ip);
> > +     if (bit < 0) {
> > +             fp->nmissed++;
> > +             return;
> > +     }
>
> Please don't use this comment style; multi line comments go like:
>
>         /*
>          * Multi line comment ...
>          *                    ... is symmetric.
>          */
>
> Same for your next patch.
  
Ze Gao May 16, 2023, 9:47 a.m. UTC | #4
Precisely, these that are called within kprobe_busy_{begin, end},
which the previous patch does not resolve.
I will refine the commit message to make it clear.

FYI, details can checked out here:
    Link: https://lore.kernel.org/linux-trace-kernel/20230516132516.c902edcf21028874a74fb868@kernel.org/

Regards,
Ze

On Tue, May 16, 2023 at 5:18 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Tue, May 16, 2023 at 03:18:28PM +0800, Ze Gao wrote:
> > Current implementation calls kprobe related functions before doing
> > ftrace recursion check in fprobe_kprobe_handler, which opens door
> > to kernel crash due to stack recursion if preempt_count_{add, sub}
> > is traceable.
>
> Which preempt_count*() are you referring to? The ones you just made
> _notrace in the previous patch?
  
Ze Gao May 16, 2023, 9:51 a.m. UTC | #5
Sorry for paste the wrong link, it's this one instead:
  Link: https://lore.kernel.org/bpf/20230513001757.75ae0d1b@rorschach.local.home/

It's the original discussions of this problem.

Regards,
Ze

On Tue, May 16, 2023 at 5:47 PM Ze Gao <zegao2021@gmail.com> wrote:
>
> Precisely, these that are called within kprobe_busy_{begin, end},
> which the previous patch does not resolve.
> I will refine the commit message to make it clear.
>
> FYI, details can checked out here:
>     Link: https://lore.kernel.org/linux-trace-kernel/20230516132516.c902edcf21028874a74fb868@kernel.org/
>
> Regards,
> Ze
>
> On Tue, May 16, 2023 at 5:18 PM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Tue, May 16, 2023 at 03:18:28PM +0800, Ze Gao wrote:
> > > Current implementation calls kprobe related functions before doing
> > > ftrace recursion check in fprobe_kprobe_handler, which opens door
> > > to kernel crash due to stack recursion if preempt_count_{add, sub}
> > > is traceable.
> >
> > Which preempt_count*() are you referring to? The ones you just made
> > _notrace in the previous patch?
  
Masami Hiramatsu (Google) May 16, 2023, 4:03 p.m. UTC | #6
On Tue, 16 May 2023 17:47:52 +0800
Ze Gao <zegao2021@gmail.com> wrote:

> Precisely, these that are called within kprobe_busy_{begin, end},
> which the previous patch does not resolve.

Note that kprobe_busy_{begin,end} don't need to use notrace version
because kprobe itself prohibits probing on preempt_count_{add,sub}.

Thank you,

> I will refine the commit message to make it clear.
> 
> FYI, details can checked out here:
>     Link: https://lore.kernel.org/linux-trace-kernel/20230516132516.c902edcf21028874a74fb868@kernel.org/
> 
> Regards,
> Ze
> 
> On Tue, May 16, 2023 at 5:18 PM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Tue, May 16, 2023 at 03:18:28PM +0800, Ze Gao wrote:
> > > Current implementation calls kprobe related functions before doing
> > > ftrace recursion check in fprobe_kprobe_handler, which opens door
> > > to kernel crash due to stack recursion if preempt_count_{add, sub}
> > > is traceable.
> >
> > Which preempt_count*() are you referring to? The ones you just made
> > _notrace in the previous patch?
  
Ze Gao May 17, 2023, 1:54 a.m. UTC | #7
Oops, I misunderstood your comments before.

Yes, it's not necessary to do this reordering as regards to kprobe.

Thanks for your review.

I'll rebase onto the latest tree and send v3 ASAP.

Regards,
Ze

On Wed, May 17, 2023 at 12:03 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Tue, 16 May 2023 17:47:52 +0800
> Ze Gao <zegao2021@gmail.com> wrote:
>
> > Precisely, these that are called within kprobe_busy_{begin, end},
> > which the previous patch does not resolve.
>
> Note that kprobe_busy_{begin,end} don't need to use notrace version
> because kprobe itself prohibits probing on preempt_count_{add,sub}.
>
> Thank you,
>
> > I will refine the commit message to make it clear.
> >
> > FYI, details can checked out here:
> >     Link: https://lore.kernel.org/linux-trace-kernel/20230516132516.c902edcf21028874a74fb868@kernel.org/
> >
> > Regards,
> > Ze
> >
> > On Tue, May 16, 2023 at 5:18 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > On Tue, May 16, 2023 at 03:18:28PM +0800, Ze Gao wrote:
> > > > Current implementation calls kprobe related functions before doing
> > > > ftrace recursion check in fprobe_kprobe_handler, which opens door
> > > > to kernel crash due to stack recursion if preempt_count_{add, sub}
> > > > is traceable.
> > >
> > > Which preempt_count*() are you referring to? The ones you just made
> > > _notrace in the previous patch?
>
>
> --
> Masami Hiramatsu (Google) <mhiramat@kernel.org>
  
Masami Hiramatsu (Google) May 17, 2023, 2:54 a.m. UTC | #8
On Wed, 17 May 2023 09:54:53 +0800
Ze Gao <zegao2021@gmail.com> wrote:

> Oops, I misunderstood your comments before.
> 
> Yes, it's not necessary to do this reordering as regards to kprobe.

Let me confirm, I meant that your current patch is correct. I just mentioned
that kprobe_busy_{begin,end} will continue use standard version because
kprobe itself handles that. Please update only the patch description and
add my ack.

Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>

If you add Steve's call graph for the explanation, it will help us to
understand what will be fixed.

Thank you,

> 
> Thanks for your review.
> 
> I'll rebase onto the latest tree and send v3 ASAP.
> 
> Regards,
> Ze
> 
> On Wed, May 17, 2023 at 12:03 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> >
> > On Tue, 16 May 2023 17:47:52 +0800
> > Ze Gao <zegao2021@gmail.com> wrote:
> >
> > > Precisely, these that are called within kprobe_busy_{begin, end},
> > > which the previous patch does not resolve.
> >
> > Note that kprobe_busy_{begin,end} don't need to use notrace version
> > because kprobe itself prohibits probing on preempt_count_{add,sub}.
> >
> > Thank you,
> >
> > > I will refine the commit message to make it clear.
> > >
> > > FYI, details can checked out here:
> > >     Link: https://lore.kernel.org/linux-trace-kernel/20230516132516.c902edcf21028874a74fb868@kernel.org/
> > >
> > > Regards,
> > > Ze
> > >
> > > On Tue, May 16, 2023 at 5:18 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > > >
> > > > On Tue, May 16, 2023 at 03:18:28PM +0800, Ze Gao wrote:
> > > > > Current implementation calls kprobe related functions before doing
> > > > > ftrace recursion check in fprobe_kprobe_handler, which opens door
> > > > > to kernel crash due to stack recursion if preempt_count_{add, sub}
> > > > > is traceable.
> > > >
> > > > Which preempt_count*() are you referring to? The ones you just made
> > > > _notrace in the previous patch?
> >
> >
> > --
> > Masami Hiramatsu (Google) <mhiramat@kernel.org>
  
Ze Gao May 17, 2023, 3:10 a.m. UTC | #9
Got it! :)

I will improve the commit message and send v3 ASAP.

BTW, which tree should I rebase those patches onto? Is that the
for-next branch of
git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git. I saw
Jiri had troubles
applying those since these works are based on v6.4.0.

THX,
Ze

On Wed, May 17, 2023 at 10:54 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Wed, 17 May 2023 09:54:53 +0800
> Ze Gao <zegao2021@gmail.com> wrote:
>
> > Oops, I misunderstood your comments before.
> >
> > Yes, it's not necessary to do this reordering as regards to kprobe.
>
> Let me confirm, I meant that your current patch is correct. I just mentioned
> that kprobe_busy_{begin,end} will continue use standard version because
> kprobe itself handles that. Please update only the patch description and
> add my ack.
>
> Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
>
> If you add Steve's call graph for the explanation, it will help us to
> understand what will be fixed.
>
> Thank you,
>
> >
> > Thanks for your review.
> >
> > I'll rebase onto the latest tree and send v3 ASAP.
> >
> > Regards,
> > Ze
> >
> > On Wed, May 17, 2023 at 12:03 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > >
> > > On Tue, 16 May 2023 17:47:52 +0800
> > > Ze Gao <zegao2021@gmail.com> wrote:
> > >
> > > > Precisely, these that are called within kprobe_busy_{begin, end},
> > > > which the previous patch does not resolve.
> > >
> > > Note that kprobe_busy_{begin,end} don't need to use notrace version
> > > because kprobe itself prohibits probing on preempt_count_{add,sub}.
> > >
> > > Thank you,
> > >
> > > > I will refine the commit message to make it clear.
> > > >
> > > > FYI, details can checked out here:
> > > >     Link: https://lore.kernel.org/linux-trace-kernel/20230516132516.c902edcf21028874a74fb868@kernel.org/
> > > >
> > > > Regards,
> > > > Ze
> > > >
> > > > On Tue, May 16, 2023 at 5:18 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > > > >
> > > > > On Tue, May 16, 2023 at 03:18:28PM +0800, Ze Gao wrote:
> > > > > > Current implementation calls kprobe related functions before doing
> > > > > > ftrace recursion check in fprobe_kprobe_handler, which opens door
> > > > > > to kernel crash due to stack recursion if preempt_count_{add, sub}
> > > > > > is traceable.
> > > > >
> > > > > Which preempt_count*() are you referring to? The ones you just made
> > > > > _notrace in the previous patch?
> > >
> > >
> > > --
> > > Masami Hiramatsu (Google) <mhiramat@kernel.org>
>
>
> --
> Masami Hiramatsu (Google) <mhiramat@kernel.org>
  
Steven Rostedt May 17, 2023, 3:25 a.m. UTC | #10
On Wed, 17 May 2023 11:10:21 +0800
Ze Gao <zegao2021@gmail.com> wrote:

> Got it! :)
> 
> I will improve the commit message and send v3 ASAP.
> 
> BTW, which tree should I rebase those patches onto? Is that the
> for-next branch of
> git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git. I saw
> Jiri had troubles
> applying those since these works are based on v6.4.0.
> 

You can submit against 6.4-rc1. We haven't updated the for-next branch
yet. Which will be rebased off of one of the 6.4 rc's.

-- Steve
  
Masami Hiramatsu (Google) May 17, 2023, 3:51 a.m. UTC | #11
On Tue, 16 May 2023 23:25:45 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Wed, 17 May 2023 11:10:21 +0800
> Ze Gao <zegao2021@gmail.com> wrote:
> 
> > Got it! :)
> > 
> > I will improve the commit message and send v3 ASAP.
> > 
> > BTW, which tree should I rebase those patches onto? Is that the
> > for-next branch of
> > git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git. I saw
> > Jiri had troubles
> > applying those since these works are based on v6.4.0.
> > 
> 
> You can submit against 6.4-rc1. We haven't updated the for-next branch
> yet. Which will be rebased off of one of the 6.4 rc's.

Yeah, I would like to pick it to probes/fixes for 6.4 and stable branches.

Thank you,
  
Ze Gao May 17, 2023, 4:51 a.m. UTC | #12
Hi Jiri,
This is the latest version against 6.4-rc1, and you can apply without trouble.
     https://lore.kernel.org/linux-trace-kernel/20230517034510.15639-1-zegao@tencent.com/T/#t

Regards,
Ze

On Wed, May 17, 2023 at 11:25 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Wed, 17 May 2023 11:10:21 +0800
> Ze Gao <zegao2021@gmail.com> wrote:
>
> > Got it! :)
> >
> > I will improve the commit message and send v3 ASAP.
> >
> > BTW, which tree should I rebase those patches onto? Is that the
> > for-next branch of
> > git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git. I saw
> > Jiri had troubles
> > applying those since these works are based on v6.4.0.
> >
>
> You can submit against 6.4-rc1. We haven't updated the for-next branch
> yet. Which will be rebased off of one of the 6.4 rc's.
>
> -- Steve
  

Patch

diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 9abb3905bc8e..097c740799ba 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -20,30 +20,22 @@  struct fprobe_rethook_node {
 	char data[];
 };
 
-static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
-			   struct ftrace_ops *ops, struct ftrace_regs *fregs)
+static inline void __fprobe_handler(unsigned long ip, unsigned long
+		parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs)
 {
 	struct fprobe_rethook_node *fpr;
 	struct rethook_node *rh = NULL;
 	struct fprobe *fp;
 	void *entry_data = NULL;
-	int bit, ret;
+	int ret;
 
 	fp = container_of(ops, struct fprobe, ops);
-	if (fprobe_disabled(fp))
-		return;
-
-	bit = ftrace_test_recursion_trylock(ip, parent_ip);
-	if (bit < 0) {
-		fp->nmissed++;
-		return;
-	}
 
 	if (fp->exit_handler) {
 		rh = rethook_try_get(fp->rethook);
 		if (!rh) {
 			fp->nmissed++;
-			goto out;
+			return;
 		}
 		fpr = container_of(rh, struct fprobe_rethook_node, node);
 		fpr->entry_ip = ip;
@@ -61,23 +53,60 @@  static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
 		else
 			rethook_hook(rh, ftrace_get_regs(fregs), true);
 	}
-out:
+}
+
+static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
+		struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+	struct fprobe *fp;
+	int bit;
+
+	fp = container_of(ops, struct fprobe, ops);
+	if (fprobe_disabled(fp))
+		return;
+
+	/* recursion detection has to go before any traceable function and
+	 * all functions before this point should be marked as notrace
+	 */
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
+	if (bit < 0) {
+		fp->nmissed++;
+		return;
+	}
+	__fprobe_handler(ip, parent_ip, ops, fregs);
 	ftrace_test_recursion_unlock(bit);
+
 }
 NOKPROBE_SYMBOL(fprobe_handler);
 
 static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
 				  struct ftrace_ops *ops, struct ftrace_regs *fregs)
 {
-	struct fprobe *fp = container_of(ops, struct fprobe, ops);
+	struct fprobe *fp;
+	int bit;
+
+	fp = container_of(ops, struct fprobe, ops);
+	if (fprobe_disabled(fp))
+		return;
+
+	/* recursion detection has to go before any traceable function and
+	 * all functions called before this point should be marked as notrace
+	 */
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
+	if (bit < 0) {
+		fp->nmissed++;
+		return;
+	}
 
 	if (unlikely(kprobe_running())) {
 		fp->nmissed++;
 		return;
 	}
+
 	kprobe_busy_begin();
-	fprobe_handler(ip, parent_ip, ops, fregs);
+	__fprobe_handler(ip, parent_ip, ops, fregs);
 	kprobe_busy_end();
+	ftrace_test_recursion_unlock(bit);
 }
 
 static void fprobe_exit_handler(struct rethook_node *rh, void *data,