[v3,5/8] tracing/fprobe: Enable fprobe events with CONFIG_DYNAMIC_FTRACE_WITH_ARGS

Message ID 169181865486.505132.6447946094827872988.stgit@devnote2
State New
Headers
Series bpf: fprobe: rethook: Use ftrace_regs instead of pt_regs |

Commit Message

Masami Hiramatsu (Google) Aug. 12, 2023, 5:37 a.m. UTC
  From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Allow fprobe events to be enabled with CONFIG_DYNAMIC_FTRACE_WITH_ARGS.
With this change, fprobe events mostly use ftrace_regs instead of pt_regs.
Note that if the arch doesn't enable HAVE_PT_REGS_COMPAT_FTRACE_REGS,
fprobe events will not be able to be used from perf.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 Changes in v3:
   - introduce ftrace_regs_get_kernel_stack_nth().
   - fix typo.
---
 include/linux/ftrace.h          |   15 +++++++++
 kernel/trace/Kconfig            |    1 -
 kernel/trace/trace_fprobe.c     |   65 ++++++++++++++++++++-------------------
 kernel/trace/trace_probe_tmpl.h |    2 +
 4 files changed, 50 insertions(+), 33 deletions(-)
  

Comments

Florent Revest Aug. 17, 2023, 8:57 a.m. UTC | #1
On Sat, Aug 12, 2023 at 7:37 AM Masami Hiramatsu (Google)
<mhiramat@kernel.org> wrote:
>
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index d56304276318..6fb4ecf8767d 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -679,7 +679,6 @@ config FPROBE_EVENTS
>         select TRACING
>         select PROBE_EVENTS
>         select DYNAMIC_EVENTS
> -       depends on DYNAMIC_FTRACE_WITH_REGS

I believe that, in practice, fprobe events still rely on WITH_REGS:

> diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
> index f440c97e050f..94c01dc061ec 100644
> --- a/kernel/trace/trace_fprobe.c
> +++ b/kernel/trace/trace_fprobe.c
> @@ -327,14 +328,15 @@ static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip,
>         struct pt_regs *regs = ftrace_get_regs(fregs);

Because here you require the entry handler needs ftrace_regs that are
full pt_regs.

>         int ret = 0;
>
> +       if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
> +               fentry_trace_func(tf, entry_ip, fregs);
> +
> +#ifdef CONFIG_PERF_EVENTS
>         if (!regs)
>                 return 0;
>
> -       if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
> -               fentry_trace_func(tf, entry_ip, regs);
> -#ifdef CONFIG_PERF_EVENTS
>         if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
> -               ret = fentry_perf_func(tf, entry_ip, regs);
> +               ret = fentry_perf_func(tf, entry_ip, fregs, regs);
>  #endif
>         return ret;
>  }
> @@ -347,14 +349,15 @@ static void fexit_dispatcher(struct fprobe *fp, unsigned long entry_ip,
>         struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
>         struct pt_regs *regs = ftrace_get_regs(fregs);

And same here with the return handler

I think fprobe events would need the same sort of refactoring as
kprobe_multi bpf: using ftrace_partial_regs so they work on build
!WITH_REGS.
  
Masami Hiramatsu (Google) Aug. 18, 2023, 11:11 a.m. UTC | #2
On Thu, 17 Aug 2023 10:57:50 +0200
Florent Revest <revest@chromium.org> wrote:

> On Sat, Aug 12, 2023 at 7:37 AM Masami Hiramatsu (Google)
> <mhiramat@kernel.org> wrote:
> >
> > diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> > index d56304276318..6fb4ecf8767d 100644
> > --- a/kernel/trace/Kconfig
> > +++ b/kernel/trace/Kconfig
> > @@ -679,7 +679,6 @@ config FPROBE_EVENTS
> >         select TRACING
> >         select PROBE_EVENTS
> >         select DYNAMIC_EVENTS
> > -       depends on DYNAMIC_FTRACE_WITH_REGS
> 
> I believe that, in practice, fprobe events still rely on WITH_REGS:
> 
> > diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
> > index f440c97e050f..94c01dc061ec 100644
> > --- a/kernel/trace/trace_fprobe.c
> > +++ b/kernel/trace/trace_fprobe.c
> > @@ -327,14 +328,15 @@ static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip,
> >         struct pt_regs *regs = ftrace_get_regs(fregs);
> 
> Because here you require the entry handler needs ftrace_regs that are
> full pt_regs.

Ah, that is for perf events. Yes, that is the problematic point.
Since perf's interfaces are depending on the pt_regs (especially stacktrace)
I can not remove this part. This is the next issue to be solved.
Maybe we can use partial pt_regs for stack tracing, so we can swap the order
of the patches to introduce ftrace_partial_regs() before this and use it for
perf event.

> 
> >         int ret = 0;
> >
> > +       if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
> > +               fentry_trace_func(tf, entry_ip, fregs);
> > +
> > +#ifdef CONFIG_PERF_EVENTS
> >         if (!regs)
> >                 return 0;
> >
> > -       if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
> > -               fentry_trace_func(tf, entry_ip, regs);
> > -#ifdef CONFIG_PERF_EVENTS
> >         if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
> > -               ret = fentry_perf_func(tf, entry_ip, regs);
> > +               ret = fentry_perf_func(tf, entry_ip, fregs, regs);
> >  #endif
> >         return ret;
> >  }
> > @@ -347,14 +349,15 @@ static void fexit_dispatcher(struct fprobe *fp, unsigned long entry_ip,
> >         struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
> >         struct pt_regs *regs = ftrace_get_regs(fregs);
> 
> And same here with the return handler
> 
> I think fprobe events would need the same sort of refactoring as
> kprobe_multi bpf: using ftrace_partial_regs so they work on build
> !WITH_REGS.

Actually, kprobe_multi is using fprobe directly, so this is not related
to bpf part.

Thank you,
  
Masami Hiramatsu (Google) Aug. 18, 2023, 12:09 p.m. UTC | #3
On Thu, 17 Aug 2023 13:44:41 -0700
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> On Fri, Aug 11, 2023 at 10:37 PM Masami Hiramatsu (Google)
> <mhiramat@kernel.org> wrote:
> >
> > +#ifdef CONFIG_HAVE_REGS_AND_STACK_ACCESS_API
> > +static __always_inline unsigned long
> > +ftrace_regs_get_kernel_stack_nth(struct ftrace_regs *fregs, unsigned int nth)
> > +{
> > +       unsigned long *stackp;
> > +
> > +       stackp = (unsigned long *)ftrace_regs_get_stack_pointer(fregs);
> > +       if (((unsigned long)(stackp + nth) & ~(THREAD_SIZE - 1)) ==
> > +           ((unsigned long)stackp & ~(THREAD_SIZE - 1)))
> > +               return *(stackp + nth);
> > +
> > +       return 0;
> > +}
> > +#endif /* CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
> ...
> >
> > @@ -140,17 +140,17 @@ process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
> >         /* 1st stage: get value from context */
> >         switch (code->op) {
> >         case FETCH_OP_STACK:
> > -               val = regs_get_kernel_stack_nth(regs, code->param);
> > +               val = ftrace_regs_get_kernel_stack_nth(fregs, code->param);
> >                 break;
> 
> Just noticed that bit.
> You probably want to document that access to arguments and
> especially arguments on stack is not precise. It's "best effort".
> x86-64 calling convention is not as simple as it might appear.
> For example if 6th argument is a 16-byte struct like sockptr_t it will be
> passed on the stack and 7th actual argument (if it's <= 8 byte) will
> be the register.

Yes, that's right. To access the precise arguments, the easiest way is
using DWARF (e.g. perf probe), and another way is reconstruct the ABI
from the type like BTF.

> 
> Things similar on 32-bit and there is a non-zero chance that
> regs_get_kernel_argument() doesn't return the actual arg.

Yeah, it is hard to get the actual argument...

Let me update the document.

Thank you,
  

Patch

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fe335d861f08..3b3a0b1f592f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -171,6 +171,21 @@  static __always_inline bool ftrace_regs_has_args(struct ftrace_regs *fregs)
 	return ftrace_get_regs(fregs) != NULL;
 }
 
+#ifdef CONFIG_HAVE_REGS_AND_STACK_ACCESS_API
+static __always_inline unsigned long
+ftrace_regs_get_kernel_stack_nth(struct ftrace_regs *fregs, unsigned int nth)
+{
+	unsigned long *stackp;
+
+	stackp = (unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+	if (((unsigned long)(stackp + nth) & ~(THREAD_SIZE - 1)) ==
+	    ((unsigned long)stackp & ~(THREAD_SIZE - 1)))
+		return *(stackp + nth);
+
+	return 0;
+}
+#endif /* CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
+
 #ifdef CONFIG_FUNCTION_TRACER
 
 extern int ftrace_enabled;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d56304276318..6fb4ecf8767d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -679,7 +679,6 @@  config FPROBE_EVENTS
 	select TRACING
 	select PROBE_EVENTS
 	select DYNAMIC_EVENTS
-	depends on DYNAMIC_FTRACE_WITH_REGS
 	default y
 	help
 	  This allows user to add tracing events on the function entry and
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index f440c97e050f..94c01dc061ec 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -132,7 +132,7 @@  static int
 process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
 		   void *base)
 {
-	struct pt_regs *regs = rec;
+	struct ftrace_regs *fregs = rec;
 	unsigned long val;
 	int ret;
 
@@ -140,17 +140,17 @@  process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
 	/* 1st stage: get value from context */
 	switch (code->op) {
 	case FETCH_OP_STACK:
-		val = regs_get_kernel_stack_nth(regs, code->param);
+		val = ftrace_regs_get_kernel_stack_nth(fregs, code->param);
 		break;
 	case FETCH_OP_STACKP:
-		val = kernel_stack_pointer(regs);
+		val = ftrace_regs_get_stack_pointer(fregs);
 		break;
 	case FETCH_OP_RETVAL:
-		val = regs_return_value(regs);
+		val = ftrace_regs_return_value(fregs);
 		break;
 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
 	case FETCH_OP_ARG:
-		val = regs_get_kernel_argument(regs, code->param);
+		val = ftrace_regs_get_argument(fregs, code->param);
 		break;
 #endif
 	case FETCH_NOP_SYMBOL:	/* Ignore a place holder */
@@ -170,7 +170,7 @@  NOKPROBE_SYMBOL(process_fetch_insn)
 /* function entry handler */
 static nokprobe_inline void
 __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
-		    struct pt_regs *regs,
+		    struct ftrace_regs *fregs,
 		    struct trace_event_file *trace_file)
 {
 	struct fentry_trace_entry_head *entry;
@@ -184,36 +184,36 @@  __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
 	if (trace_trigger_soft_disabled(trace_file))
 		return;
 
-	dsize = __get_data_size(&tf->tp, regs);
+	dsize = __get_data_size(&tf->tp, fregs);
 
 	entry = trace_event_buffer_reserve(&fbuffer, trace_file,
 					   sizeof(*entry) + tf->tp.size + dsize);
 	if (!entry)
 		return;
 
-	fbuffer.regs = regs;
+	fbuffer.regs = ftrace_get_regs(fregs);
 	entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
 	entry->ip = entry_ip;
-	store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
+	store_trace_args(&entry[1], &tf->tp, fregs, sizeof(*entry), dsize);
 
 	trace_event_buffer_commit(&fbuffer);
 }
 
 static void
 fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
-		  struct pt_regs *regs)
+		  struct ftrace_regs *fregs)
 {
 	struct event_file_link *link;
 
 	trace_probe_for_each_link_rcu(link, &tf->tp)
-		__fentry_trace_func(tf, entry_ip, regs, link->file);
+		__fentry_trace_func(tf, entry_ip, fregs, link->file);
 }
 NOKPROBE_SYMBOL(fentry_trace_func);
 
 /* Kretprobe handler */
 static nokprobe_inline void
 __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
-		   unsigned long ret_ip, struct pt_regs *regs,
+		   unsigned long ret_ip, struct ftrace_regs *fregs,
 		   struct trace_event_file *trace_file)
 {
 	struct fexit_trace_entry_head *entry;
@@ -227,37 +227,37 @@  __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
 	if (trace_trigger_soft_disabled(trace_file))
 		return;
 
-	dsize = __get_data_size(&tf->tp, regs);
+	dsize = __get_data_size(&tf->tp, fregs);
 
 	entry = trace_event_buffer_reserve(&fbuffer, trace_file,
 					   sizeof(*entry) + tf->tp.size + dsize);
 	if (!entry)
 		return;
 
-	fbuffer.regs = regs;
+	fbuffer.regs = ftrace_get_regs(fregs);
 	entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
 	entry->func = entry_ip;
 	entry->ret_ip = ret_ip;
-	store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
+	store_trace_args(&entry[1], &tf->tp, fregs, sizeof(*entry), dsize);
 
 	trace_event_buffer_commit(&fbuffer);
 }
 
 static void
 fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
-		 unsigned long ret_ip, struct pt_regs *regs)
+		 unsigned long ret_ip, struct ftrace_regs *fregs)
 {
 	struct event_file_link *link;
 
 	trace_probe_for_each_link_rcu(link, &tf->tp)
-		__fexit_trace_func(tf, entry_ip, ret_ip, regs, link->file);
+		__fexit_trace_func(tf, entry_ip, ret_ip, fregs, link->file);
 }
 NOKPROBE_SYMBOL(fexit_trace_func);
 
 #ifdef CONFIG_PERF_EVENTS
 
 static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
-			    struct pt_regs *regs)
+			    struct ftrace_regs *fregs, struct pt_regs *regs)
 {
 	struct trace_event_call *call = trace_probe_event_call(&tf->tp);
 	struct fentry_trace_entry_head *entry;
@@ -269,7 +269,7 @@  static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
 	if (hlist_empty(head))
 		return 0;
 
-	dsize = __get_data_size(&tf->tp, regs);
+	dsize = __get_data_size(&tf->tp, fregs);
 	__size = sizeof(*entry) + tf->tp.size + dsize;
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
@@ -280,7 +280,7 @@  static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
 
 	entry->ip = entry_ip;
 	memset(&entry[1], 0, dsize);
-	store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
+	store_trace_args(&entry[1], &tf->tp, fregs, sizeof(*entry), dsize);
 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
 			      head, NULL);
 	return 0;
@@ -289,7 +289,8 @@  NOKPROBE_SYMBOL(fentry_perf_func);
 
 static void
 fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
-		unsigned long ret_ip, struct pt_regs *regs)
+		unsigned long ret_ip, struct ftrace_regs *fregs,
+		struct pt_regs *regs)
 {
 	struct trace_event_call *call = trace_probe_event_call(&tf->tp);
 	struct fexit_trace_entry_head *entry;
@@ -301,7 +302,7 @@  fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
 	if (hlist_empty(head))
 		return;
 
-	dsize = __get_data_size(&tf->tp, regs);
+	dsize = __get_data_size(&tf->tp, fregs);
 	__size = sizeof(*entry) + tf->tp.size + dsize;
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
@@ -312,7 +313,7 @@  fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
 
 	entry->func = entry_ip;
 	entry->ret_ip = ret_ip;
-	store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
+	store_trace_args(&entry[1], &tf->tp, fregs, sizeof(*entry), dsize);
 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
 			      head, NULL);
 }
@@ -327,14 +328,15 @@  static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip,
 	struct pt_regs *regs = ftrace_get_regs(fregs);
 	int ret = 0;
 
+	if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
+		fentry_trace_func(tf, entry_ip, fregs);
+
+#ifdef CONFIG_PERF_EVENTS
 	if (!regs)
 		return 0;
 
-	if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
-		fentry_trace_func(tf, entry_ip, regs);
-#ifdef CONFIG_PERF_EVENTS
 	if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
-		ret = fentry_perf_func(tf, entry_ip, regs);
+		ret = fentry_perf_func(tf, entry_ip, fregs, regs);
 #endif
 	return ret;
 }
@@ -347,14 +349,15 @@  static void fexit_dispatcher(struct fprobe *fp, unsigned long entry_ip,
 	struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
 	struct pt_regs *regs = ftrace_get_regs(fregs);
 
+	if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
+		fexit_trace_func(tf, entry_ip, ret_ip, fregs);
+
+#ifdef CONFIG_PERF_EVENTS
 	if (!regs)
 		return;
 
-	if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
-		fexit_trace_func(tf, entry_ip, ret_ip, regs);
-#ifdef CONFIG_PERF_EVENTS
 	if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
-		fexit_perf_func(tf, entry_ip, ret_ip, regs);
+		fexit_perf_func(tf, entry_ip, ret_ip, fregs, regs);
 #endif
 }
 NOKPROBE_SYMBOL(fexit_dispatcher);
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
index 3935b347f874..05445a745a07 100644
--- a/kernel/trace/trace_probe_tmpl.h
+++ b/kernel/trace/trace_probe_tmpl.h
@@ -232,7 +232,7 @@  process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
 
 /* Sum up total data length for dynamic arrays (strings) */
 static nokprobe_inline int
-__get_data_size(struct trace_probe *tp, struct pt_regs *regs)
+__get_data_size(struct trace_probe *tp, void *regs)
 {
 	struct probe_arg *arg;
 	int i, len, ret = 0;