[PATCHv2] ftrace: Show all functions with addresses in available_filter_functions_addrs

Message ID 20230611130029.1202298-1-jolsa@kernel.org
State New
Headers
Series [PATCHv2] ftrace: Show all functions with addresses in available_filter_functions_addrs |

Commit Message

Jiri Olsa June 11, 2023, 1 p.m. UTC
  Adding new available_filter_functions_addrs file that shows all available
functions (same as available_filter_functions) together with addresses,
like:

  # cat available_filter_functions_addrs | head
  ffffffff81000770 __traceiter_initcall_level
  ffffffff810007c0 __traceiter_initcall_start
  ffffffff81000810 __traceiter_initcall_finish
  ffffffff81000860 trace_initcall_finish_cb
  ...

Note displayed address is the patch-site address and can differ from
/proc/kallsyms address.

It's useful to have address avilable for traceable symbols, so we don't
need to allways cross check kallsyms with available_filter_functions
(or the other way around) and have all the data in single file.

For backwards compatibility reasons we can't change the existing
available_filter_functions file output, but we need to add new file.

Suggested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 Documentation/trace/ftrace.rst |  6 ++++++
 include/linux/ftrace.h         |  1 +
 kernel/trace/ftrace.c          | 37 ++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+)

v2 changes:
  - simplified address print [Steven]
  - added doc entry for the new file
  

Comments

Yonghong Song June 12, 2023, 12:22 a.m. UTC | #1
On 6/11/23 6:00 AM, Jiri Olsa wrote:
> Adding new available_filter_functions_addrs file that shows all available
> functions (same as available_filter_functions) together with addresses,
> like:
> 
>    # cat available_filter_functions_addrs | head
>    ffffffff81000770 __traceiter_initcall_level
>    ffffffff810007c0 __traceiter_initcall_start
>    ffffffff81000810 __traceiter_initcall_finish
>    ffffffff81000860 trace_initcall_finish_cb
>    ...
> 
> Note displayed address is the patch-site address and can differ from
> /proc/kallsyms address.

Could you explain how these addresses will be used in kernel, esp.
since these addresses are different from kallsyms addresses?

Also, if there are multiple same static functions with
different addresses, user space might need to check dwarf or
proposed BTF_KIND_KFUNC (which encode kallsyms addresses)
to find whether entry in available_filter_functions_addrs
to be used. But addresses may not match. How this issue could
be resolved?

> 
> It's useful to have address avilable for traceable symbols, so we don't
> need to allways cross check kallsyms with available_filter_functions
> (or the other way around) and have all the data in single file.
> 
> For backwards compatibility reasons we can't change the existing
> available_filter_functions file output, but we need to add new file.
> 
> Suggested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> Suggested-by: Andrii Nakryiko <andrii@kernel.org>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>   Documentation/trace/ftrace.rst |  6 ++++++
>   include/linux/ftrace.h         |  1 +
>   kernel/trace/ftrace.c          | 37 ++++++++++++++++++++++++++++++++++
>   3 files changed, 44 insertions(+)
> 
> v2 changes:
>    - simplified address print [Steven]
>    - added doc entry for the new file
> 
> diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
> index 027437b745a0..e97573e3fc4a 100644
> --- a/Documentation/trace/ftrace.rst
> +++ b/Documentation/trace/ftrace.rst
> @@ -324,6 +324,12 @@ of ftrace. Here is a list of some of the key files:
>   	"set_graph_function", or "set_graph_notrace".
>   	(See the section "dynamic ftrace" below for more details.)
>   
> +  available_filter_functions_addrs:
> +
> +	Similar to available_filter_functions, but with address displayed
> +	for each function. The displayed address is the patch-site address
> +	and can differ from /proc/kallsyms address.
> +
>     dyn_ftrace_total_info:
>   
>   	This file is for debugging purposes. The number of functions that
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index b23bdd414394..6e372575a8e9 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -633,6 +633,7 @@ enum {
>   	FTRACE_ITER_MOD		= (1 << 5),
>   	FTRACE_ITER_ENABLED	= (1 << 6),
>   	FTRACE_ITER_TOUCHED	= (1 << 7),
> +	FTRACE_ITER_ADDRS	= (1 << 8),
>   };
>   
>   void arch_ftrace_update_code(int command);
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 764668467155..b24c573934af 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -3861,6 +3861,9 @@ static int t_show(struct seq_file *m, void *v)
>   	if (!rec)
>   		return 0;
>   
> +	if (iter->flags & FTRACE_ITER_ADDRS)
> +		seq_printf(m, "%lx ", rec->ip);
> +
>   	if (print_rec(m, rec->ip)) {
>   		/* This should only happen when a rec is disabled */
>   		WARN_ON_ONCE(!(rec->flags & FTRACE_FL_DISABLED));
> @@ -3996,6 +3999,30 @@ ftrace_touched_open(struct inode *inode, struct file *file)
>   	return 0;
>   }
>   
> +static int
> +ftrace_avail_addrs_open(struct inode *inode, struct file *file)
> +{
> +	struct ftrace_iterator *iter;
> +	int ret;
> +
> +	ret = security_locked_down(LOCKDOWN_TRACEFS);
> +	if (ret)
> +		return ret;
> +
> +	if (unlikely(ftrace_disabled))
> +		return -ENODEV;
> +
> +	iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
> +	if (!iter)
> +		return -ENOMEM;
> +
> +	iter->pg = ftrace_pages_start;
> +	iter->flags = FTRACE_ITER_ADDRS;
> +	iter->ops = &global_ops;
> +
> +	return 0;
> +}
> +
>   /**
>    * ftrace_regex_open - initialize function tracer filter files
>    * @ops: The ftrace_ops that hold the hash filters
> @@ -5916,6 +5943,13 @@ static const struct file_operations ftrace_touched_fops = {
>   	.release = seq_release_private,
>   };
>   
> +static const struct file_operations ftrace_avail_addrs_fops = {
> +	.open = ftrace_avail_addrs_open,
> +	.read = seq_read,
> +	.llseek = seq_lseek,
> +	.release = seq_release_private,
> +};
> +
>   static const struct file_operations ftrace_filter_fops = {
>   	.open = ftrace_filter_open,
>   	.read = seq_read,
> @@ -6377,6 +6411,9 @@ static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer)
>   	trace_create_file("available_filter_functions", TRACE_MODE_READ,
>   			d_tracer, NULL, &ftrace_avail_fops);
>   
> +	trace_create_file("available_filter_functions_addrs", TRACE_MODE_READ,
> +			d_tracer, NULL, &ftrace_avail_addrs_fops);
> +
>   	trace_create_file("enabled_functions", TRACE_MODE_READ,
>   			d_tracer, NULL, &ftrace_enabled_fops);
>
  
Steven Rostedt June 12, 2023, 2:54 a.m. UTC | #2
On Sun, 11 Jun 2023 17:22:29 -0700
Yonghong Song <yhs@meta.com> wrote:

> > Note displayed address is the patch-site address and can differ from
> > /proc/kallsyms address.  
> 
> Could you explain how these addresses will be used in kernel, esp.
> since these addresses are different from kallsyms addresses?

These are the addresses that will be patched by the ftrace infrastructure.
It's more efficient to use these addresses when setting a function filter
with ftrace_set_filter_ip(), as it doesn't need to call into the kallsyms
infrastructure to look to see if the passed in ip lands on a function. That
is, if rec->ip matches the ip, it uses it. If you look at ftrace_location()
(which is used to find the record to be patched), it first does a search of
these addresses. If it doesn't find it, it then has to call into kallsyms
to find the start and end of the function, and do another search into that
range.

> 
> Also, if there are multiple same static functions with
> different addresses, user space might need to check dwarf or
> proposed BTF_KIND_KFUNC (which encode kallsyms addresses)
> to find whether entry in available_filter_functions_addrs
> to be used. But addresses may not match. How this issue could
> be resolved?

Easy, you use the address between two other addresses in kallsyms. The
address is still in the function. The addresses in kallsyms is the starting
address, but there's cases that the patch location is not at the start.

-- Steve
  
Steven Rostedt June 12, 2023, 2:57 a.m. UTC | #3
On Sun, 11 Jun 2023 22:54:07 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> > Also, if there are multiple same static functions with
> > different addresses, user space might need to check dwarf or
> > proposed BTF_KIND_KFUNC (which encode kallsyms addresses)
> > to find whether entry in available_filter_functions_addrs
> > to be used. But addresses may not match. How this issue could
> > be resolved?  
> 
> Easy, you use the address between two other addresses in kallsyms. The
> address is still in the function. The addresses in kallsyms is the starting
> address, but there's cases that the patch location is not at the start.

Not to mention, you can still use the kallsyms address. If you did the work
to find it, then use it (it may not be as efficient as I mentioned before).
That's basically what is done today (so I am told), and this patch was to
create a file where you don't need to look up kallsyms when you know which
function to use. The functions are sorted by address, so if you know of a
unique function near the duplicate, you just find the duplicate that's near
the unique function name.

-- Steve
  
Yonghong Song June 12, 2023, 2:49 p.m. UTC | #4
On 6/11/23 7:57 PM, Steven Rostedt wrote:
> On Sun, 11 Jun 2023 22:54:07 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
>>> Also, if there are multiple same static functions with
>>> different addresses, user space might need to check dwarf or
>>> proposed BTF_KIND_KFUNC (which encode kallsyms addresses)
>>> to find whether entry in available_filter_functions_addrs
>>> to be used. But addresses may not match. How this issue could
>>> be resolved?
>>
>> Easy, you use the address between two other addresses in kallsyms. The
>> address is still in the function. The addresses in kallsyms is the starting
>> address, but there's cases that the patch location is not at the start.
> 
> Not to mention, you can still use the kallsyms address. If you did the work
> to find it, then use it (it may not be as efficient as I mentioned before).
> That's basically what is done today (so I am told), and this patch was to
> create a file where you don't need to look up kallsyms when you know which
> function to use. The functions are sorted by address, so if you know of a
> unique function near the duplicate, you just find the duplicate that's near
> the unique function name.

Thanks for explanation. IIUC, typically with endbr enabled, the patch
address typically is the entry_addr + 4. So it is indeed easy to
correlate the entry in available_filter_functions_addrs and in
kallsyms.

I am actually interested in how available_filter_functions_addrs
will be used. For example, bpf_program__attach_kprobe_multi_opts()
can already take addresses from kallsyms. How to use
available_filter_functions_addrs to facilitate kprobe_multi?
Do we need to change kernel APIs? It would be great at least we
got a RFC patch to answer these questions.

> 
> -- Steve
  
Steven Rostedt June 12, 2023, 3:02 p.m. UTC | #5
On Mon, 12 Jun 2023 07:49:53 -0700
Yonghong Song <yhs@meta.com> wrote:

> I am actually interested in how available_filter_functions_addrs
> will be used. For example, bpf_program__attach_kprobe_multi_opts()
> can already take addresses from kallsyms. How to use
> available_filter_functions_addrs to facilitate kprobe_multi?
> Do we need to change kernel APIs? It would be great at least we
> got a RFC patch to answer these questions.

I agree, having that information would also be useful to me.

Jiri? Andrii?

-- Steve
  
Jiri Olsa June 12, 2023, 8:25 p.m. UTC | #6
On Mon, Jun 12, 2023 at 11:02:22AM -0400, Steven Rostedt wrote:
> On Mon, 12 Jun 2023 07:49:53 -0700
> Yonghong Song <yhs@meta.com> wrote:
> 
> > I am actually interested in how available_filter_functions_addrs
> > will be used. For example, bpf_program__attach_kprobe_multi_opts()
> > can already take addresses from kallsyms. How to use
> > available_filter_functions_addrs to facilitate kprobe_multi?

the problem is that we need to do 2 passes:

 - through available_filter_functions and find out if the function is traceable
 - through /proc/kallsyms to get the address for traceable function

having available_filter_functions symbols together with addresses allow
us to skip the kallsyms step

and we are ok with the address in available_filter_functions_addr not being the
function entry, because kprobe_multi uses fprobe and that handles both entry and
patch-site address properly

> > Do we need to change kernel APIs? It would be great at least we
> > got a RFC patch to answer these questions.
> 
> I agree, having that information would also be useful to me.
> 
> Jiri? Andrii?

so we have 2 interfaces how to create kprobe_multi link:

  a) passing symbols to kernel

     1) user gathers symbols and need to ensure that they are
        trace-able -> pass through available_filter_functions file

     2) kernel takes those symbols and translates them to addresses
        through kallsyms api

     3) addresses are passed to fprobe/ftrace through:

         register_fprobe_ips
         -> ftrace_set_filter_ips

  b) passing addresses to kernel

     1) user gathers symbols and needs to ensure that they are
        trace-able -> pass through available_filter_functions file

     2) user takes those symbols and translates them to addresses
       through /proc/kallsyms

     3) addresses are passed to the kernel and kernel calls:

         register_fprobe_ips
         -> ftrace_set_filter_ips


The new available_filter_functions_addrs file helps us with option b),
because we can make 'b 1' and 'b 2' in one step - while filtering traceable
functions, we get the address directly.

I tested the new available_filter_functions_addrs changes with some hacked
selftest changes, you can check it in here [1].

I assume Jackie Liu will send new version of her patchset [2] based on this
new available_filter_functions_addrs file.

I think we should have these changes coming together and add some perf
measurements from before and after to make the benefit apparent.

jirka


[1] https://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git/commit/?h=bpf/avail_addrs&id=fecaeeaf40bae034715ab2e9a46ca1dc16371e8e
[2] https://lore.kernel.org/bpf/20230526155026.1419390-1-liu.yun@linux.dev/#r
  
Andrii Nakryiko June 12, 2023, 11:28 p.m. UTC | #7
On Mon, Jun 12, 2023 at 1:25 PM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Mon, Jun 12, 2023 at 11:02:22AM -0400, Steven Rostedt wrote:
> > On Mon, 12 Jun 2023 07:49:53 -0700
> > Yonghong Song <yhs@meta.com> wrote:
> >
> > > I am actually interested in how available_filter_functions_addrs
> > > will be used. For example, bpf_program__attach_kprobe_multi_opts()
> > > can already take addresses from kallsyms. How to use
> > > available_filter_functions_addrs to facilitate kprobe_multi?
>
> the problem is that we need to do 2 passes:
>
>  - through available_filter_functions and find out if the function is traceable
>  - through /proc/kallsyms to get the address for traceable function
>
> having available_filter_functions symbols together with addresses allow
> us to skip the kallsyms step
>
> and we are ok with the address in available_filter_functions_addr not being the
> function entry, because kprobe_multi uses fprobe and that handles both entry and
> patch-site address properly
>
> > > Do we need to change kernel APIs? It would be great at least we
> > > got a RFC patch to answer these questions.
> >
> > I agree, having that information would also be useful to me.
> >
> > Jiri? Andrii?
>
> so we have 2 interfaces how to create kprobe_multi link:
>
>   a) passing symbols to kernel
>
>      1) user gathers symbols and need to ensure that they are
>         trace-able -> pass through available_filter_functions file
>
>      2) kernel takes those symbols and translates them to addresses
>         through kallsyms api
>
>      3) addresses are passed to fprobe/ftrace through:
>
>          register_fprobe_ips
>          -> ftrace_set_filter_ips
>
>   b) passing addresses to kernel
>
>      1) user gathers symbols and needs to ensure that they are
>         trace-able -> pass through available_filter_functions file
>
>      2) user takes those symbols and translates them to addresses
>        through /proc/kallsyms
>
>      3) addresses are passed to the kernel and kernel calls:
>
>          register_fprobe_ips
>          -> ftrace_set_filter_ips
>
>
> The new available_filter_functions_addrs file helps us with option b),
> because we can make 'b 1' and 'b 2' in one step - while filtering traceable
> functions, we get the address directly.
>
> I tested the new available_filter_functions_addrs changes with some hacked
> selftest changes, you can check it in here [1].
>
> I assume Jackie Liu will send new version of her patchset [2] based on this
> new available_filter_functions_addrs file.
>
> I think we should have these changes coming together and add some perf
> measurements from before and after to make the benefit apparent.
>

If Steven would be ok with it, can we land this change through the
bpf-next tree? Then we can have BPF selftest added in the same patch
set that parses a new file and uses bpf_program__attach_kprobe_multi()
to attach using explicit addresses.

This should make it clear to everyone how this is meant to be used and
will be a good test that everything works end-to-end.

> jirka
>
>
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git/commit/?h=bpf/avail_addrs&id=fecaeeaf40bae034715ab2e9a46ca1dc16371e8e
> [2] https://lore.kernel.org/bpf/20230526155026.1419390-1-liu.yun@linux.dev/#r
  
Steven Rostedt June 12, 2023, 11:31 p.m. UTC | #8
On Mon, 12 Jun 2023 16:28:49 -0700
Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:


> If Steven would be ok with it, can we land this change through the
> bpf-next tree? Then we can have BPF selftest added in the same patch
> set that parses a new file and uses bpf_program__attach_kprobe_multi()
> to attach using explicit addresses.
> 
> This should make it clear to everyone how this is meant to be used and
> will be a good test that everything works end-to-end.
> 

This touches some of the code I'm working with, so I rather have it be in
my tree.

-- Steve
  
Jackie Liu June 13, 2023, 1:22 a.m. UTC | #9
在 2023/6/13 04:25, Jiri Olsa 写道:
> On Mon, Jun 12, 2023 at 11:02:22AM -0400, Steven Rostedt wrote:
>> On Mon, 12 Jun 2023 07:49:53 -0700
>> Yonghong Song <yhs@meta.com> wrote:
>>
>>> I am actually interested in how available_filter_functions_addrs
>>> will be used. For example, bpf_program__attach_kprobe_multi_opts()
>>> can already take addresses from kallsyms. How to use
>>> available_filter_functions_addrs to facilitate kprobe_multi?
> 
> the problem is that we need to do 2 passes:
> 
>   - through available_filter_functions and find out if the function is traceable
>   - through /proc/kallsyms to get the address for traceable function
> 
> having available_filter_functions symbols together with addresses allow
> us to skip the kallsyms step
> 
> and we are ok with the address in available_filter_functions_addr not being the
> function entry, because kprobe_multi uses fprobe and that handles both entry and
> patch-site address properly
> 
>>> Do we need to change kernel APIs? It would be great at least we
>>> got a RFC patch to answer these questions.
>>
>> I agree, having that information would also be useful to me.
>>
>> Jiri? Andrii?
> 
> so we have 2 interfaces how to create kprobe_multi link:
> 
>    a) passing symbols to kernel
> 
>       1) user gathers symbols and need to ensure that they are
>          trace-able -> pass through available_filter_functions file
> 
>       2) kernel takes those symbols and translates them to addresses
>          through kallsyms api
> 
>       3) addresses are passed to fprobe/ftrace through:
> 
>           register_fprobe_ips
>           -> ftrace_set_filter_ips
> 
>    b) passing addresses to kernel
> 
>       1) user gathers symbols and needs to ensure that they are
>          trace-able -> pass through available_filter_functions file
> 
>       2) user takes those symbols and translates them to addresses
>         through /proc/kallsyms
> 
>       3) addresses are passed to the kernel and kernel calls:
> 
>           register_fprobe_ips
>           -> ftrace_set_filter_ips
> 
> 
> The new available_filter_functions_addrs file helps us with option b),
> because we can make 'b 1' and 'b 2' in one step - while filtering traceable
> functions, we get the address directly.
> 
> I tested the new available_filter_functions_addrs changes with some hacked
> selftest changes, you can check it in here [1].
> 
> I assume Jackie Liu will send new version of her patchset [2] based on this
> new available_filter_functions_addrs file.

Yes, once the new interface is released, I will release a v2 version 
patch based on it.
  
Yonghong Song June 13, 2023, 5:04 a.m. UTC | #10
On 6/12/23 1:25 PM, Jiri Olsa wrote:
> On Mon, Jun 12, 2023 at 11:02:22AM -0400, Steven Rostedt wrote:
>> On Mon, 12 Jun 2023 07:49:53 -0700
>> Yonghong Song <yhs@meta.com> wrote:
>>
>>> I am actually interested in how available_filter_functions_addrs
>>> will be used. For example, bpf_program__attach_kprobe_multi_opts()
>>> can already take addresses from kallsyms. How to use
>>> available_filter_functions_addrs to facilitate kprobe_multi?
> 
> the problem is that we need to do 2 passes:
> 
>   - through available_filter_functions and find out if the function is traceable
>   - through /proc/kallsyms to get the address for traceable function
> 
> having available_filter_functions symbols together with addresses allow
> us to skip the kallsyms step
> 
> and we are ok with the address in available_filter_functions_addr not being the
> function entry, because kprobe_multi uses fprobe and that handles both entry and
> patch-site address properly

Thanks for explanation! It would be great if we can put more details in
this email into the commit message!

> 
>>> Do we need to change kernel APIs? It would be great at least we
>>> got a RFC patch to answer these questions.
>>
>> I agree, having that information would also be useful to me.
>>
>> Jiri? Andrii?
> 
> so we have 2 interfaces how to create kprobe_multi link:
> 
>    a) passing symbols to kernel
> 
>       1) user gathers symbols and need to ensure that they are
>          trace-able -> pass through available_filter_functions file
> 
>       2) kernel takes those symbols and translates them to addresses
>          through kallsyms api
> 
>       3) addresses are passed to fprobe/ftrace through:
> 
>           register_fprobe_ips
>           -> ftrace_set_filter_ips
> 
>    b) passing addresses to kernel
> 
>       1) user gathers symbols and needs to ensure that they are
>          trace-able -> pass through available_filter_functions file
> 
>       2) user takes those symbols and translates them to addresses
>         through /proc/kallsyms
> 
>       3) addresses are passed to the kernel and kernel calls:
> 
>           register_fprobe_ips
>           -> ftrace_set_filter_ips
> 
> 
> The new available_filter_functions_addrs file helps us with option b),
> because we can make 'b 1' and 'b 2' in one step - while filtering traceable
> functions, we get the address directly.
> 
> I tested the new available_filter_functions_addrs changes with some hacked
> selftest changes, you can check it in here [1].
> 
> I assume Jackie Liu will send new version of her patchset [2] based on this
> new available_filter_functions_addrs file.
> 
> I think we should have these changes coming together and add some perf
> measurements from before and after to make the benefit apparent.
> 
> jirka
> 
> 
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git/commit/?h=bpf/avail_addrs&id=fecaeeaf40bae034715ab2e9a46ca1dc16371e8e
> [2] https://lore.kernel.org/bpf/20230526155026.1419390-1-liu.yun@linux.dev/#r
  
Steven Rostedt June 13, 2023, 1:36 p.m. UTC | #11
On Mon, 12 Jun 2023 22:04:28 -0700
Yonghong Song <yhs@meta.com> wrote:

> Thanks for explanation! It would be great if we can put more details in
> this email into the commit message!

I agree.

This is the patch I just pulled into my queue:

From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 11 Jun 2023 15:00:29 +0200
Subject: [PATCH] ftrace: Show all functions with addresses in
 available_filter_functions_addrs

Adding new available_filter_functions_addrs file that shows all available
functions (same as available_filter_functions) together with addresses,
like:

  # cat available_filter_functions_addrs | head
  ffffffff81000770 __traceiter_initcall_level
  ffffffff810007c0 __traceiter_initcall_start
  ffffffff81000810 __traceiter_initcall_finish
  ffffffff81000860 trace_initcall_finish_cb
  ...

Note displayed address is the patch-site address and can differ from
/proc/kallsyms address.

It's useful to have address avilable for traceable symbols, so we don't
need to allways cross check kallsyms with available_filter_functions
(or the other way around) and have all the data in single file.

For backwards compatibility reasons we can't change the existing
available_filter_functions file output, but we need to add new file.

The problem is that we need to do 2 passes:

 - through available_filter_functions and find out if the function is traceable
 - through /proc/kallsyms to get the address for traceable function

Having available_filter_functions symbols together with addresses allow
us to skip the kallsyms step and we are ok with the address in
available_filter_functions_addr not being the function entry, because
kprobe_multi uses fprobe and that handles both entry and patch-site
address properly.

We have 2 interfaces how to create kprobe_multi link:

  a) passing symbols to kernel

     1) user gathers symbols and need to ensure that they are
        trace-able -> pass through available_filter_functions file

     2) kernel takes those symbols and translates them to addresses
        through kallsyms api

     3) addresses are passed to fprobe/ftrace through:

         register_fprobe_ips
         -> ftrace_set_filter_ips

  b) passing addresses to kernel

     1) user gathers symbols and needs to ensure that they are
        trace-able -> pass through available_filter_functions file

     2) user takes those symbols and translates them to addresses
       through /proc/kallsyms

     3) addresses are passed to the kernel and kernel calls:

         register_fprobe_ips
         -> ftrace_set_filter_ips

The new available_filter_functions_addrs file helps us with option b),
because we can make 'b 1' and 'b 2' in one step - while filtering traceable
functions, we get the address directly.

Link: https://lore.kernel.org/linux-trace-kernel/20230611130029.1202298-1-jolsa@kernel.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Jackie Liu <liu.yun@linux.dev>
Suggested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 Documentation/trace/ftrace.rst |  6 ++++++
 include/linux/ftrace.h         |  1 +
 kernel/trace/ftrace.c          | 37 ++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+)

diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index df2d3e57a83f..b7308ab10c0e 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -324,6 +324,12 @@ of ftrace. Here is a list of some of the key files:
 	"set_graph_function", or "set_graph_notrace".
 	(See the section "dynamic ftrace" below for more details.)
 
+  available_filter_functions_addrs:
+
+	Similar to available_filter_functions, but with address displayed
+	for each function. The displayed address is the patch-site address
+	and can differ from /proc/kallsyms address.
+
   dyn_ftrace_total_info:
 
 	This file is for debugging purposes. The number of functions that
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 49f279f4c3a1..8e59bd954153 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -633,6 +633,7 @@ enum {
 	FTRACE_ITER_MOD		= (1 << 5),
 	FTRACE_ITER_ENABLED	= (1 << 6),
 	FTRACE_ITER_TOUCHED	= (1 << 7),
+	FTRACE_ITER_ADDRS	= (1 << 8),
 };
 
 void arch_ftrace_update_code(int command);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 764668467155..b24c573934af 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3861,6 +3861,9 @@ static int t_show(struct seq_file *m, void *v)
 	if (!rec)
 		return 0;
 
+	if (iter->flags & FTRACE_ITER_ADDRS)
+		seq_printf(m, "%lx ", rec->ip);
+
 	if (print_rec(m, rec->ip)) {
 		/* This should only happen when a rec is disabled */
 		WARN_ON_ONCE(!(rec->flags & FTRACE_FL_DISABLED));
@@ -3996,6 +3999,30 @@ ftrace_touched_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static int
+ftrace_avail_addrs_open(struct inode *inode, struct file *file)
+{
+	struct ftrace_iterator *iter;
+	int ret;
+
+	ret = security_locked_down(LOCKDOWN_TRACEFS);
+	if (ret)
+		return ret;
+
+	if (unlikely(ftrace_disabled))
+		return -ENODEV;
+
+	iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
+	if (!iter)
+		return -ENOMEM;
+
+	iter->pg = ftrace_pages_start;
+	iter->flags = FTRACE_ITER_ADDRS;
+	iter->ops = &global_ops;
+
+	return 0;
+}
+
 /**
  * ftrace_regex_open - initialize function tracer filter files
  * @ops: The ftrace_ops that hold the hash filters
@@ -5916,6 +5943,13 @@ static const struct file_operations ftrace_touched_fops = {
 	.release = seq_release_private,
 };
 
+static const struct file_operations ftrace_avail_addrs_fops = {
+	.open = ftrace_avail_addrs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
 static const struct file_operations ftrace_filter_fops = {
 	.open = ftrace_filter_open,
 	.read = seq_read,
@@ -6377,6 +6411,9 @@ static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer)
 	trace_create_file("available_filter_functions", TRACE_MODE_READ,
 			d_tracer, NULL, &ftrace_avail_fops);
 
+	trace_create_file("available_filter_functions_addrs", TRACE_MODE_READ,
+			d_tracer, NULL, &ftrace_avail_addrs_fops);
+
 	trace_create_file("enabled_functions", TRACE_MODE_READ,
 			d_tracer, NULL, &ftrace_enabled_fops);
  
Jiri Olsa June 13, 2023, 1:44 p.m. UTC | #12
On Tue, Jun 13, 2023 at 09:36:06AM -0400, Steven Rostedt wrote:
> On Mon, 12 Jun 2023 22:04:28 -0700
> Yonghong Song <yhs@meta.com> wrote:
> 
> > Thanks for explanation! It would be great if we can put more details in
> > this email into the commit message!
> 
> I agree.
> 
> This is the patch I just pulled into my queue:

great, thanks

jirka

> 
> From: Jiri Olsa <jolsa@kernel.org>
> Date: Sun, 11 Jun 2023 15:00:29 +0200
> Subject: [PATCH] ftrace: Show all functions with addresses in
>  available_filter_functions_addrs
> 
> Adding new available_filter_functions_addrs file that shows all available
> functions (same as available_filter_functions) together with addresses,
> like:
> 
>   # cat available_filter_functions_addrs | head
>   ffffffff81000770 __traceiter_initcall_level
>   ffffffff810007c0 __traceiter_initcall_start
>   ffffffff81000810 __traceiter_initcall_finish
>   ffffffff81000860 trace_initcall_finish_cb
>   ...
> 
> Note displayed address is the patch-site address and can differ from
> /proc/kallsyms address.
> 
> It's useful to have address avilable for traceable symbols, so we don't
> need to allways cross check kallsyms with available_filter_functions
> (or the other way around) and have all the data in single file.
> 
> For backwards compatibility reasons we can't change the existing
> available_filter_functions file output, but we need to add new file.
> 
> The problem is that we need to do 2 passes:
> 
>  - through available_filter_functions and find out if the function is traceable
>  - through /proc/kallsyms to get the address for traceable function
> 
> Having available_filter_functions symbols together with addresses allow
> us to skip the kallsyms step and we are ok with the address in
> available_filter_functions_addr not being the function entry, because
> kprobe_multi uses fprobe and that handles both entry and patch-site
> address properly.
> 
> We have 2 interfaces how to create kprobe_multi link:
> 
>   a) passing symbols to kernel
> 
>      1) user gathers symbols and need to ensure that they are
>         trace-able -> pass through available_filter_functions file
> 
>      2) kernel takes those symbols and translates them to addresses
>         through kallsyms api
> 
>      3) addresses are passed to fprobe/ftrace through:
> 
>          register_fprobe_ips
>          -> ftrace_set_filter_ips
> 
>   b) passing addresses to kernel
> 
>      1) user gathers symbols and needs to ensure that they are
>         trace-able -> pass through available_filter_functions file
> 
>      2) user takes those symbols and translates them to addresses
>        through /proc/kallsyms
> 
>      3) addresses are passed to the kernel and kernel calls:
> 
>          register_fprobe_ips
>          -> ftrace_set_filter_ips
> 
> The new available_filter_functions_addrs file helps us with option b),
> because we can make 'b 1' and 'b 2' in one step - while filtering traceable
> functions, we get the address directly.
> 
> Link: https://lore.kernel.org/linux-trace-kernel/20230611130029.1202298-1-jolsa@kernel.org
> 
> Cc: Masami Hiramatsu <mhiramat@kernel.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> Cc: Jackie Liu <liu.yun@linux.dev>
> Suggested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> Suggested-by: Andrii Nakryiko <andrii@kernel.org>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> ---
>  Documentation/trace/ftrace.rst |  6 ++++++
>  include/linux/ftrace.h         |  1 +
>  kernel/trace/ftrace.c          | 37 ++++++++++++++++++++++++++++++++++
>  3 files changed, 44 insertions(+)
> 
> diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
> index df2d3e57a83f..b7308ab10c0e 100644
> --- a/Documentation/trace/ftrace.rst
> +++ b/Documentation/trace/ftrace.rst
> @@ -324,6 +324,12 @@ of ftrace. Here is a list of some of the key files:
>  	"set_graph_function", or "set_graph_notrace".
>  	(See the section "dynamic ftrace" below for more details.)
>  
> +  available_filter_functions_addrs:
> +
> +	Similar to available_filter_functions, but with address displayed
> +	for each function. The displayed address is the patch-site address
> +	and can differ from /proc/kallsyms address.
> +
>    dyn_ftrace_total_info:
>  
>  	This file is for debugging purposes. The number of functions that
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 49f279f4c3a1..8e59bd954153 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -633,6 +633,7 @@ enum {
>  	FTRACE_ITER_MOD		= (1 << 5),
>  	FTRACE_ITER_ENABLED	= (1 << 6),
>  	FTRACE_ITER_TOUCHED	= (1 << 7),
> +	FTRACE_ITER_ADDRS	= (1 << 8),
>  };
>  
>  void arch_ftrace_update_code(int command);
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 764668467155..b24c573934af 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -3861,6 +3861,9 @@ static int t_show(struct seq_file *m, void *v)
>  	if (!rec)
>  		return 0;
>  
> +	if (iter->flags & FTRACE_ITER_ADDRS)
> +		seq_printf(m, "%lx ", rec->ip);
> +
>  	if (print_rec(m, rec->ip)) {
>  		/* This should only happen when a rec is disabled */
>  		WARN_ON_ONCE(!(rec->flags & FTRACE_FL_DISABLED));
> @@ -3996,6 +3999,30 @@ ftrace_touched_open(struct inode *inode, struct file *file)
>  	return 0;
>  }
>  
> +static int
> +ftrace_avail_addrs_open(struct inode *inode, struct file *file)
> +{
> +	struct ftrace_iterator *iter;
> +	int ret;
> +
> +	ret = security_locked_down(LOCKDOWN_TRACEFS);
> +	if (ret)
> +		return ret;
> +
> +	if (unlikely(ftrace_disabled))
> +		return -ENODEV;
> +
> +	iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
> +	if (!iter)
> +		return -ENOMEM;
> +
> +	iter->pg = ftrace_pages_start;
> +	iter->flags = FTRACE_ITER_ADDRS;
> +	iter->ops = &global_ops;
> +
> +	return 0;
> +}
> +
>  /**
>   * ftrace_regex_open - initialize function tracer filter files
>   * @ops: The ftrace_ops that hold the hash filters
> @@ -5916,6 +5943,13 @@ static const struct file_operations ftrace_touched_fops = {
>  	.release = seq_release_private,
>  };
>  
> +static const struct file_operations ftrace_avail_addrs_fops = {
> +	.open = ftrace_avail_addrs_open,
> +	.read = seq_read,
> +	.llseek = seq_lseek,
> +	.release = seq_release_private,
> +};
> +
>  static const struct file_operations ftrace_filter_fops = {
>  	.open = ftrace_filter_open,
>  	.read = seq_read,
> @@ -6377,6 +6411,9 @@ static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer)
>  	trace_create_file("available_filter_functions", TRACE_MODE_READ,
>  			d_tracer, NULL, &ftrace_avail_fops);
>  
> +	trace_create_file("available_filter_functions_addrs", TRACE_MODE_READ,
> +			d_tracer, NULL, &ftrace_avail_addrs_fops);
> +
>  	trace_create_file("enabled_functions", TRACE_MODE_READ,
>  			d_tracer, NULL, &ftrace_enabled_fops);
>  
> -- 
> 2.39.2
>
  
Jiri Olsa June 14, 2023, 2:14 p.m. UTC | #13
On Mon, Jun 12, 2023 at 10:25:55PM +0200, Jiri Olsa wrote:
> On Mon, Jun 12, 2023 at 11:02:22AM -0400, Steven Rostedt wrote:
> > On Mon, 12 Jun 2023 07:49:53 -0700
> > Yonghong Song <yhs@meta.com> wrote:
> > 
> > > I am actually interested in how available_filter_functions_addrs
> > > will be used. For example, bpf_program__attach_kprobe_multi_opts()
> > > can already take addresses from kallsyms. How to use
> > > available_filter_functions_addrs to facilitate kprobe_multi?
> 
> the problem is that we need to do 2 passes:
> 
>  - through available_filter_functions and find out if the function is traceable
>  - through /proc/kallsyms to get the address for traceable function
> 
> having available_filter_functions symbols together with addresses allow
> us to skip the kallsyms step
> 
> and we are ok with the address in available_filter_functions_addr not being the
> function entry, because kprobe_multi uses fprobe and that handles both entry and
> patch-site address properly
> 
> > > Do we need to change kernel APIs? It would be great at least we
> > > got a RFC patch to answer these questions.
> > 
> > I agree, having that information would also be useful to me.
> > 
> > Jiri? Andrii?
> 
> so we have 2 interfaces how to create kprobe_multi link:
> 
>   a) passing symbols to kernel
> 
>      1) user gathers symbols and need to ensure that they are
>         trace-able -> pass through available_filter_functions file
> 
>      2) kernel takes those symbols and translates them to addresses
>         through kallsyms api
> 
>      3) addresses are passed to fprobe/ftrace through:
> 
>          register_fprobe_ips
>          -> ftrace_set_filter_ips
> 
>   b) passing addresses to kernel
> 
>      1) user gathers symbols and needs to ensure that they are
>         trace-able -> pass through available_filter_functions file
> 
>      2) user takes those symbols and translates them to addresses
>        through /proc/kallsyms
> 
>      3) addresses are passed to the kernel and kernel calls:
> 
>          register_fprobe_ips
>          -> ftrace_set_filter_ips
> 
> 
> The new available_filter_functions_addrs file helps us with option b),
> because we can make 'b 1' and 'b 2' in one step - while filtering traceable
> functions, we get the address directly.
> 
> I tested the new available_filter_functions_addrs changes with some hacked
> selftest changes, you can check it in here [1].
> 
> I assume Jackie Liu will send new version of her patchset [2] based on this
> new available_filter_functions_addrs file.
> 
> I think we should have these changes coming together and add some perf
> measurements from before and after to make the benefit apparent.

FYI I did some perf meassurements and the speedup is not substantial :-\

looks like the symbols resolving to addresses we do in kernel for kprobe_multi
link is more faster/cheaper than I thought 

but still there is 'some' speedup and we will get rid of the extra
/proc/kallsyms parsing, so I think it's still worth it to have the
new file


base:

 Performance counter stats for './test_progs -n 103/1':

       103,423,103      cycles:u
    79,279,231,029      cycles:k
    79,382,694,663      cycles

      18.627593589 seconds time elapsed

       0.025999000 seconds user
      18.323855000 seconds sys


with fix:

 Performance counter stats for './test_progs -n 103/1':

       126,659,572      cycles:u
    77,951,768,179      cycles:k
    78,078,467,451      cycles

      18.651464273 seconds time elapsed

       0.025001000 seconds user
      18.243828000 seconds sys


jirka
  
Steven Rostedt June 14, 2023, 3:12 p.m. UTC | #14
On Wed, 14 Jun 2023 16:14:19 +0200
Jiri Olsa <olsajiri@gmail.com> wrote:

> FYI I did some perf meassurements and the speedup is not substantial :-\
> 
> looks like the symbols resolving to addresses we do in kernel for kprobe_multi
> link is more faster/cheaper than I thought 

The symbol lookup is supposed to be fast, but it's not "free", whereas this
is "free". I didn't expect a big speedup.

-- Steve


> 
> but still there is 'some' speedup and we will get rid of the extra
> /proc/kallsyms parsing, so I think it's still worth it to have the
> new file
  

Patch

diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index 027437b745a0..e97573e3fc4a 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -324,6 +324,12 @@  of ftrace. Here is a list of some of the key files:
 	"set_graph_function", or "set_graph_notrace".
 	(See the section "dynamic ftrace" below for more details.)
 
+  available_filter_functions_addrs:
+
+	Similar to available_filter_functions, but with address displayed
+	for each function. The displayed address is the patch-site address
+	and can differ from /proc/kallsyms address.
+
   dyn_ftrace_total_info:
 
 	This file is for debugging purposes. The number of functions that
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index b23bdd414394..6e372575a8e9 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -633,6 +633,7 @@  enum {
 	FTRACE_ITER_MOD		= (1 << 5),
 	FTRACE_ITER_ENABLED	= (1 << 6),
 	FTRACE_ITER_TOUCHED	= (1 << 7),
+	FTRACE_ITER_ADDRS	= (1 << 8),
 };
 
 void arch_ftrace_update_code(int command);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 764668467155..b24c573934af 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3861,6 +3861,9 @@  static int t_show(struct seq_file *m, void *v)
 	if (!rec)
 		return 0;
 
+	if (iter->flags & FTRACE_ITER_ADDRS)
+		seq_printf(m, "%lx ", rec->ip);
+
 	if (print_rec(m, rec->ip)) {
 		/* This should only happen when a rec is disabled */
 		WARN_ON_ONCE(!(rec->flags & FTRACE_FL_DISABLED));
@@ -3996,6 +3999,30 @@  ftrace_touched_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static int
+ftrace_avail_addrs_open(struct inode *inode, struct file *file)
+{
+	struct ftrace_iterator *iter;
+	int ret;
+
+	ret = security_locked_down(LOCKDOWN_TRACEFS);
+	if (ret)
+		return ret;
+
+	if (unlikely(ftrace_disabled))
+		return -ENODEV;
+
+	iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
+	if (!iter)
+		return -ENOMEM;
+
+	iter->pg = ftrace_pages_start;
+	iter->flags = FTRACE_ITER_ADDRS;
+	iter->ops = &global_ops;
+
+	return 0;
+}
+
 /**
  * ftrace_regex_open - initialize function tracer filter files
  * @ops: The ftrace_ops that hold the hash filters
@@ -5916,6 +5943,13 @@  static const struct file_operations ftrace_touched_fops = {
 	.release = seq_release_private,
 };
 
+static const struct file_operations ftrace_avail_addrs_fops = {
+	.open = ftrace_avail_addrs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
 static const struct file_operations ftrace_filter_fops = {
 	.open = ftrace_filter_open,
 	.read = seq_read,
@@ -6377,6 +6411,9 @@  static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer)
 	trace_create_file("available_filter_functions", TRACE_MODE_READ,
 			d_tracer, NULL, &ftrace_avail_fops);
 
+	trace_create_file("available_filter_functions_addrs", TRACE_MODE_READ,
+			d_tracer, NULL, &ftrace_avail_addrs_fops);
+
 	trace_create_file("enabled_functions", TRACE_MODE_READ,
 			d_tracer, NULL, &ftrace_enabled_fops);