[RFC,02/10] perf: Remove get_perf_callchain() 'crosstask' argument

Message ID f2c5130322a419ad04ea328a22fc2908487a1e25.1699487758.git.jpoimboe@kernel.org
State New
Headers
Series perf: user space sframe unwinding |

Commit Message

Josh Poimboeuf Nov. 9, 2023, 12:41 a.m. UTC
  get_perf_callchain() doesn't support cross-task unwinding, so it doesn't
make much sense to have 'crosstask' as an argument.  Instead, have
perf_callchain() adjust 'user' accordingly.

Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 include/linux/perf_event.h | 2 +-
 kernel/bpf/stackmap.c      | 5 ++---
 kernel/events/callchain.c  | 6 +-----
 kernel/events/core.c       | 8 ++++----
 4 files changed, 8 insertions(+), 13 deletions(-)
  

Comments

Namhyung Kim Nov. 11, 2023, 6:11 a.m. UTC | #1
On Wed, Nov 8, 2023 at 4:44 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> get_perf_callchain() doesn't support cross-task unwinding, so it doesn't

For only user stacks, but it seems there's no place to get cross-task kernel
stacks too.

> make much sense to have 'crosstask' as an argument.  Instead, have
> perf_callchain() adjust 'user' accordingly.
>
> Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>

Acked-by: Namhyung Kim <namhyung@kernel.org>

Thanks,
Namhyung

> ---
>  include/linux/perf_event.h | 2 +-
>  kernel/bpf/stackmap.c      | 5 ++---
>  kernel/events/callchain.c  | 6 +-----
>  kernel/events/core.c       | 8 ++++----
>  4 files changed, 8 insertions(+), 13 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index f4b05954076c..2d8fa253b9df 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1534,7 +1534,7 @@ extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct p
>  extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
>  extern struct perf_callchain_entry *
>  get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
> -                  u32 max_stack, bool crosstask, bool add_mark);
> +                  u32 max_stack, bool add_mark);
>  extern int get_callchain_buffers(int max_stack);
>  extern void put_callchain_buffers(void);
>  extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index b0b0fbff7c18..e4827ca5378d 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -294,8 +294,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
>         if (max_depth > sysctl_perf_event_max_stack)
>                 max_depth = sysctl_perf_event_max_stack;
>
> -       trace = get_perf_callchain(regs, kernel, user, max_depth,
> -                                  false, false);
> +       trace = get_perf_callchain(regs, kernel, user, max_depth, false);
>
>         if (unlikely(!trace))
>                 /* couldn't fetch the stack trace */
> @@ -421,7 +420,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
>                 trace = get_callchain_entry_for_task(task, max_depth);
>         else
>                 trace = get_perf_callchain(regs, kernel, user, max_depth,
> -                                          false, false);
> +                                          false);
>         if (unlikely(!trace))
>                 goto err_fault;
>
> diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
> index 1e135195250c..aa5f9d11c28d 100644
> --- a/kernel/events/callchain.c
> +++ b/kernel/events/callchain.c
> @@ -178,7 +178,7 @@ put_callchain_entry(int rctx)
>
>  struct perf_callchain_entry *
>  get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
> -                  u32 max_stack, bool crosstask, bool add_mark)
> +                  u32 max_stack, bool add_mark)
>  {
>         struct perf_callchain_entry *entry;
>         struct perf_callchain_entry_ctx ctx;
> @@ -209,9 +209,6 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
>                 }
>
>                 if (regs) {
> -                       if (crosstask)
> -                               goto exit_put;
> -
>                         if (add_mark)
>                                 perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
>
> @@ -219,7 +216,6 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
>                 }
>         }
>
> -exit_put:
>         put_callchain_entry(rctx);
>
>         return entry;
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index b0d62df7df4e..5e41a3b70bcd 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7592,16 +7592,16 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
>  {
>         bool kernel = !event->attr.exclude_callchain_kernel;
>         bool user   = !event->attr.exclude_callchain_user;
> -       /* Disallow cross-task user callchains. */
> -       bool crosstask = event->ctx->task && event->ctx->task != current;
>         const u32 max_stack = event->attr.sample_max_stack;
>         struct perf_callchain_entry *callchain;
>
> +       /* Disallow cross-task user callchains. */
> +       user &= !event->ctx->task || event->ctx->task == current;
> +
>         if (!kernel && !user)
>                 return &__empty_callchain;
>
> -       callchain = get_perf_callchain(regs, kernel, user,
> -                                      max_stack, crosstask, true);
> +       callchain = get_perf_callchain(regs, kernel, user, max_stack, true);
>         return callchain ?: &__empty_callchain;
>  }
>
> --
> 2.41.0
>
  
Jordan Rome Nov. 11, 2023, 8:53 p.m. UTC | #2
On 11/11/23 1:11 AM, Namhyung Kim wrote:
> On Wed, Nov 8, 2023 at 4:44 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>>
>> get_perf_callchain() doesn't support cross-task unwinding, so it doesn't
> 
> For only user stacks, but it seems there's no place to get cross-task kernel
> stacks too.
> 

There is bpf_get_task_stack in kernel/bpf/stackmap.c. This can be called
inside of a BPF task iterator, where you can get the kernel stacks
for every task on the host. But as this change points out, this doesn't
work for crosstask user stack unwinding. I have a similar patch that
just exits early in this case:
https://lore.kernel.org/linux-perf-users/20231111172001.1259065-1-linux@jordanrome.com/

Though I'm not opposed with just removing the *crosstask* param
entirely as a similar check was just added in the bpf tree for
bpf_get_task_stack:
https://lore.kernel.org/bpf/20231108112334.3433136-1-jordalgo@meta.com/

>> make much sense to have 'crosstask' as an argument.  Instead, have
>> perf_callchain() adjust 'user' accordingly.
>>
>> Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
> 
> Acked-by: Namhyung Kim <namhyung@kernel.org>
> 
> Thanks,
> Namhyung
> 
>> ---
>>   include/linux/perf_event.h | 2 +-
>>   kernel/bpf/stackmap.c      | 5 ++---
>>   kernel/events/callchain.c  | 6 +-----
>>   kernel/events/core.c       | 8 ++++----
>>   4 files changed, 8 insertions(+), 13 deletions(-)
>>
>> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
>> index f4b05954076c..2d8fa253b9df 100644
>> --- a/include/linux/perf_event.h
>> +++ b/include/linux/perf_event.h
>> @@ -1534,7 +1534,7 @@ extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct p
>>   extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
>>   extern struct perf_callchain_entry *
>>   get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
>> -                  u32 max_stack, bool crosstask, bool add_mark);
>> +                  u32 max_stack, bool add_mark);
>>   extern int get_callchain_buffers(int max_stack);
>>   extern void put_callchain_buffers(void);
>>   extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
>> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
>> index b0b0fbff7c18..e4827ca5378d 100644
>> --- a/kernel/bpf/stackmap.c
>> +++ b/kernel/bpf/stackmap.c
>> @@ -294,8 +294,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
>>          if (max_depth > sysctl_perf_event_max_stack)
>>                  max_depth = sysctl_perf_event_max_stack;
>>
>> -       trace = get_perf_callchain(regs, kernel, user, max_depth,
>> -                                  false, false);
>> +       trace = get_perf_callchain(regs, kernel, user, max_depth, false);
>>
>>          if (unlikely(!trace))
>>                  /* couldn't fetch the stack trace */
>> @@ -421,7 +420,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
>>                  trace = get_callchain_entry_for_task(task, max_depth);
>>          else
>>                  trace = get_perf_callchain(regs, kernel, user, max_depth,
>> -                                          false, false);
>> +                                          false);
>>          if (unlikely(!trace))
>>                  goto err_fault;
>>
>> diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
>> index 1e135195250c..aa5f9d11c28d 100644
>> --- a/kernel/events/callchain.c
>> +++ b/kernel/events/callchain.c
>> @@ -178,7 +178,7 @@ put_callchain_entry(int rctx)
>>
>>   struct perf_callchain_entry *
>>   get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
>> -                  u32 max_stack, bool crosstask, bool add_mark)
>> +                  u32 max_stack, bool add_mark)
>>   {
>>          struct perf_callchain_entry *entry;
>>          struct perf_callchain_entry_ctx ctx;
>> @@ -209,9 +209,6 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
>>                  }
>>
>>                  if (regs) {
>> -                       if (crosstask)
>> -                               goto exit_put;
>> -
>>                          if (add_mark)
>>                                  perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
>>
>> @@ -219,7 +216,6 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
>>                  }
>>          }
>>
>> -exit_put:
>>          put_callchain_entry(rctx);
>>
>>          return entry;
>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>> index b0d62df7df4e..5e41a3b70bcd 100644
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
>> @@ -7592,16 +7592,16 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
>>   {
>>          bool kernel = !event->attr.exclude_callchain_kernel;
>>          bool user   = !event->attr.exclude_callchain_user;
>> -       /* Disallow cross-task user callchains. */
>> -       bool crosstask = event->ctx->task && event->ctx->task != current;
>>          const u32 max_stack = event->attr.sample_max_stack;
>>          struct perf_callchain_entry *callchain;
>>
>> +       /* Disallow cross-task user callchains. */
>> +       user &= !event->ctx->task || event->ctx->task == current;
>> +
>>          if (!kernel && !user)
>>                  return &__empty_callchain;
>>
>> -       callchain = get_perf_callchain(regs, kernel, user,
>> -                                      max_stack, crosstask, true);
>> +       callchain = get_perf_callchain(regs, kernel, user, max_stack, true);
>>          return callchain ?: &__empty_callchain;
>>   }
>>
>> --
>> 2.41.0
>>
  

Patch

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f4b05954076c..2d8fa253b9df 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1534,7 +1534,7 @@  extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct p
 extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
 extern struct perf_callchain_entry *
 get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
-		   u32 max_stack, bool crosstask, bool add_mark);
+		   u32 max_stack, bool add_mark);
 extern int get_callchain_buffers(int max_stack);
 extern void put_callchain_buffers(void);
 extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index b0b0fbff7c18..e4827ca5378d 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -294,8 +294,7 @@  BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
 	if (max_depth > sysctl_perf_event_max_stack)
 		max_depth = sysctl_perf_event_max_stack;
 
-	trace = get_perf_callchain(regs, kernel, user, max_depth,
-				   false, false);
+	trace = get_perf_callchain(regs, kernel, user, max_depth, false);
 
 	if (unlikely(!trace))
 		/* couldn't fetch the stack trace */
@@ -421,7 +420,7 @@  static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
 		trace = get_callchain_entry_for_task(task, max_depth);
 	else
 		trace = get_perf_callchain(regs, kernel, user, max_depth,
-					   false, false);
+					   false);
 	if (unlikely(!trace))
 		goto err_fault;
 
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 1e135195250c..aa5f9d11c28d 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -178,7 +178,7 @@  put_callchain_entry(int rctx)
 
 struct perf_callchain_entry *
 get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
-		   u32 max_stack, bool crosstask, bool add_mark)
+		   u32 max_stack, bool add_mark)
 {
 	struct perf_callchain_entry *entry;
 	struct perf_callchain_entry_ctx ctx;
@@ -209,9 +209,6 @@  get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
 		}
 
 		if (regs) {
-			if (crosstask)
-				goto exit_put;
-
 			if (add_mark)
 				perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
 
@@ -219,7 +216,6 @@  get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
 		}
 	}
 
-exit_put:
 	put_callchain_entry(rctx);
 
 	return entry;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b0d62df7df4e..5e41a3b70bcd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7592,16 +7592,16 @@  perf_callchain(struct perf_event *event, struct pt_regs *regs)
 {
 	bool kernel = !event->attr.exclude_callchain_kernel;
 	bool user   = !event->attr.exclude_callchain_user;
-	/* Disallow cross-task user callchains. */
-	bool crosstask = event->ctx->task && event->ctx->task != current;
 	const u32 max_stack = event->attr.sample_max_stack;
 	struct perf_callchain_entry *callchain;
 
+	/* Disallow cross-task user callchains. */
+	user &= !event->ctx->task || event->ctx->task == current;
+
 	if (!kernel && !user)
 		return &__empty_callchain;
 
-	callchain = get_perf_callchain(regs, kernel, user,
-				       max_stack, crosstask, true);
+	callchain = get_perf_callchain(regs, kernel, user, max_stack, true);
 	return callchain ?: &__empty_callchain;
 }