[v2,4/5] perf kvm: Support sampling guest callchains
Commit Message
This patch provides support for sampling guests' callchains.
The signature of `get_perf_callchain` has been modified to explicitly
specify whether it needs to sample the host or guest callchain.
Based on the context, it will distribute the sampling request to one of
`perf_callchain_user`, `perf_callchain_kernel`, or `perf_callchain_guest`.
The reason for separately implementing `perf_callchain_user` and
`perf_callchain_kernel` is that the kernel may utilize special unwinders
such as `ORC`. However, for the guest, we only support stackframe-based
unwinding, so the implementation is generic and only needs to be
separately implemented for 32-bit and 64-bit.
Signed-off-by: Tianyi Liu <i.pear@outlook.com>
---
arch/x86/events/core.c | 56 +++++++++++++++++++++++++++++++-------
include/linux/perf_event.h | 3 +-
kernel/bpf/stackmap.c | 8 +++---
kernel/events/callchain.c | 27 +++++++++++++++++-
kernel/events/core.c | 7 ++++-
5 files changed, 84 insertions(+), 17 deletions(-)
Comments
Hi Tianyi,
kernel test robot noticed the following build warnings:
[auto build test WARNING on 8a749fd1a8720d4619c91c8b6e7528c0a355c0aa]
url: https://github.com/intel-lab-lkp/linux/commits/Tianyi-Liu/KVM-Add-arch-specific-interfaces-for-sampling-guest-callchains/20231008-230042
base: 8a749fd1a8720d4619c91c8b6e7528c0a355c0aa
patch link: https://lore.kernel.org/r/SY4P282MB108433024762F1F292D47C2A9DCFA%40SY4P282MB1084.AUSP282.PROD.OUTLOOK.COM
patch subject: [PATCH v2 4/5] perf kvm: Support sampling guest callchains
config: i386-tinyconfig (https://download.01.org/0day-ci/archive/20231009/202310090338.4PmYjmBS-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231009/202310090338.4PmYjmBS-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202310090338.4PmYjmBS-lkp@intel.com/
All warnings (new ones prefixed by >>):
arch/x86/events/core.c: In function 'perf_callchain_guest32':
>> arch/x86/events/core.c:2784:43: warning: passing argument 1 of 'perf_guest_read_virt' discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers]
2784 | if (!perf_guest_read_virt(&fp->next_frame, &frame.next_frame,
| ^~~~~~~~~~~~~~~
In file included from arch/x86/events/core.c:15:
include/linux/perf_event.h:1531:41: note: expected 'void *' but argument is of type 'const u32 *' {aka 'const unsigned int *'}
1531 | static inline bool perf_guest_read_virt(void*, void*, unsigned int) { return 0; }
| ^~~~~
arch/x86/events/core.c:2787:43: warning: passing argument 1 of 'perf_guest_read_virt' discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers]
2787 | if (!perf_guest_read_virt(&fp->return_address, &frame.return_address,
| ^~~~~~~~~~~~~~~~~~~
include/linux/perf_event.h:1531:41: note: expected 'void *' but argument is of type 'const u32 *' {aka 'const unsigned int *'}
1531 | static inline bool perf_guest_read_virt(void*, void*, unsigned int) { return 0; }
| ^~~~~
arch/x86/events/core.c: In function 'perf_callchain_guest':
arch/x86/events/core.c:2808:51: warning: passing argument 1 of 'perf_guest_read_virt' discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers]
2808 | if (!perf_guest_read_virt(&fp->next_frame, &frame.next_frame,
| ^~~~~~~~~~~~~~~
include/linux/perf_event.h:1531:41: note: expected 'void *' but argument is of type 'struct stack_frame * const*'
1531 | static inline bool perf_guest_read_virt(void*, void*, unsigned int) { return 0; }
| ^~~~~
arch/x86/events/core.c:2811:51: warning: passing argument 1 of 'perf_guest_read_virt' discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers]
2811 | if (!perf_guest_read_virt(&fp->return_address, &frame.return_address,
| ^~~~~~~~~~~~~~~~~~~
include/linux/perf_event.h:1531:41: note: expected 'void *' but argument is of type 'const long unsigned int *'
1531 | static inline bool perf_guest_read_virt(void*, void*, unsigned int) { return 0; }
| ^~~~~
vim +2784 arch/x86/events/core.c
2775
2776 static inline void
2777 perf_callchain_guest32(struct perf_callchain_entry_ctx *entry)
2778 {
2779 struct stack_frame_ia32 frame;
2780 const struct stack_frame_ia32 *fp;
2781
2782 fp = (void *)perf_guest_get_frame_pointer();
2783 while (fp && entry->nr < entry->max_stack) {
> 2784 if (!perf_guest_read_virt(&fp->next_frame, &frame.next_frame,
2785 sizeof(frame.next_frame)))
2786 break;
2787 if (!perf_guest_read_virt(&fp->return_address, &frame.return_address,
2788 sizeof(frame.return_address)))
2789 break;
2790 perf_callchain_store(entry, frame.return_address);
2791 fp = (void *)frame.next_frame;
2792 }
2793 }
2794
У нд, 2023-10-08 у 22:57 +0800, Tianyi Liu пише:
> This patch provides support for sampling guests' callchains.
>
> The signature of `get_perf_callchain` has been modified to explicitly
> specify whether it needs to sample the host or guest callchain.
> Based on the context, it will distribute the sampling request to one of
> `perf_callchain_user`, `perf_callchain_kernel`, or `perf_callchain_guest`.
>
> The reason for separately implementing `perf_callchain_user` and
> `perf_callchain_kernel` is that the kernel may utilize special unwinders
> such as `ORC`. However, for the guest, we only support stackframe-based
> unwinding, so the implementation is generic and only needs to be
> separately implemented for 32-bit and 64-bit.
>
> Signed-off-by: Tianyi Liu <i.pear@outlook.com>
> ---
> arch/x86/events/core.c | 56 +++++++++++++++++++++++++++++++-------
> include/linux/perf_event.h | 3 +-
> kernel/bpf/stackmap.c | 8 +++---
> kernel/events/callchain.c | 27 +++++++++++++++++-
> kernel/events/core.c | 7 ++++-
> 5 files changed, 84 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index 185f902e5..ea4c86175 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -2758,11 +2758,6 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
> struct unwind_state state;
> unsigned long addr;
>
> - if (perf_guest_state()) {
> - /* TODO: We don't support guest os callchain now */
> - return;
> - }
> -
> if (perf_callchain_store(entry, regs->ip))
> return;
>
> @@ -2778,6 +2773,52 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
> }
> }
>
> +static inline void
> +perf_callchain_guest32(struct perf_callchain_entry_ctx *entry)
> +{
> + struct stack_frame_ia32 frame;
> + const struct stack_frame_ia32 *fp;
> +
> + fp = (void *)perf_guest_get_frame_pointer();
> + while (fp && entry->nr < entry->max_stack) {
> + if (!perf_guest_read_virt(&fp->next_frame, &frame.next_frame,
This should be fp->next_frame.
> + sizeof(frame.next_frame)))
> + break;
> + if (!perf_guest_read_virt(&fp->return_address, &frame.return_address,
Same here.
> + sizeof(frame.return_address)))
> + break;
> + perf_callchain_store(entry, frame.return_address);
> + fp = (void *)frame.next_frame;
> + }
> +}
> +
> +void
> +perf_callchain_guest(struct perf_callchain_entry_ctx *entry)
> +{
> + struct stack_frame frame;
> + const struct stack_frame *fp;
> + unsigned int guest_state;
> +
> + guest_state = perf_guest_state();
> + perf_callchain_store(entry, perf_guest_get_ip());
> +
> + if (guest_state & PERF_GUEST_64BIT) {
> + fp = (void *)perf_guest_get_frame_pointer();
> + while (fp && entry->nr < entry->max_stack) {
> + if (!perf_guest_read_virt(&fp->next_frame, &frame.next_frame,
Same here.
> + sizeof(frame.next_frame)))
> + break;
> + if (!perf_guest_read_virt(&fp->return_address, &frame.return_address,
And here.
> + sizeof(frame.return_address)))
> + break;
> + perf_callchain_store(entry, frame.return_address);
> + fp = (void *)frame.next_frame;
> + }
> + } else {
> + perf_callchain_guest32(entry);
> + }
> +}
For symmetry, maybe it makes sense to have perf_callchain_guest32 and perf_callchain_guest64
and then make perf_callchain_guest call each? No strong opinion on this of course.
> +
> static inline int
> valid_user_frame(const void __user *fp, unsigned long size)
> {
> @@ -2861,11 +2902,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
> struct stack_frame frame;
> const struct stack_frame __user *fp;
>
> - if (perf_guest_state()) {
> - /* TODO: We don't support guest os callchain now */
> - return;
> - }
> -
> /*
> * We don't know what to do with VM86 stacks.. ignore them for now.
> */
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index d0f937a62..a2baf4856 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1545,9 +1545,10 @@ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
>
> extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
> extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
> +extern void perf_callchain_guest(struct perf_callchain_entry_ctx *entry);
> extern struct perf_callchain_entry *
> get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
> - u32 max_stack, bool crosstask, bool add_mark);
> + bool host, bool guest, u32 max_stack, bool crosstask, bool add_mark);
> extern int get_callchain_buffers(int max_stack);
> extern void put_callchain_buffers(void);
> extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index 458bb80b1..2e88d4639 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -294,8 +294,8 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
> if (max_depth > sysctl_perf_event_max_stack)
> max_depth = sysctl_perf_event_max_stack;
>
> - trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
> - false, false);
> + trace = get_perf_callchain(regs, 0, kernel, user, true, false,
> + max_depth, false, false);
>
> if (unlikely(!trace))
> /* couldn't fetch the stack trace */
> @@ -420,8 +420,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
> else if (kernel && task)
> trace = get_callchain_entry_for_task(task, max_depth);
> else
> - trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
> - false, false);
> + trace = get_perf_callchain(regs, 0, kernel, user, true, false,
> + max_depth, false, false);
> if (unlikely(!trace))
> goto err_fault;
>
> diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
> index 1273be843..7e80729e9 100644
> --- a/kernel/events/callchain.c
> +++ b/kernel/events/callchain.c
> @@ -45,6 +45,10 @@ __weak void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
> {
> }
>
> +__weak void perf_callchain_guest(struct perf_callchain_entry_ctx *entry)
> +{
> +}
> +
> static void release_callchain_buffers_rcu(struct rcu_head *head)
> {
> struct callchain_cpus_entries *entries;
> @@ -178,11 +182,12 @@ put_callchain_entry(int rctx)
>
> struct perf_callchain_entry *
> get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
> - u32 max_stack, bool crosstask, bool add_mark)
> + bool host, bool guest, u32 max_stack, bool crosstask, bool add_mark)
> {
> struct perf_callchain_entry *entry;
> struct perf_callchain_entry_ctx ctx;
> int rctx;
> + unsigned int guest_state;
>
> entry = get_callchain_entry(&rctx);
> if (!entry)
> @@ -194,6 +199,26 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
> ctx.contexts = 0;
> ctx.contexts_maxed = false;
>
> + guest_state = perf_guest_state();
> + if (guest_state) {
> + if (!guest)
> + goto exit_put;
> + if (user && (guest_state & PERF_GUEST_USER)) {
> + if (add_mark)
> + perf_callchain_store_context(&ctx, PERF_CONTEXT_GUEST_USER);
> + perf_callchain_guest(&ctx);
> + }
> + if (kernel && !(guest_state & PERF_GUEST_USER)) {
> + if (add_mark)
> + perf_callchain_store_context(&ctx, PERF_CONTEXT_GUEST_KERNEL);
> + perf_callchain_guest(&ctx);
> + }
> + goto exit_put;
> + }
> +
> + if (unlikely(!host))
> + goto exit_put;
> +
> if (kernel && !user_mode(regs)) {
> if (add_mark)
> perf_callchain_store_context(&ctx, PERF_CONTEXT_KERNEL);
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index eaba00ec2..b3401f403 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7559,6 +7559,8 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
> {
> bool kernel = !event->attr.exclude_callchain_kernel;
> bool user = !event->attr.exclude_callchain_user;
> + bool host = !event->attr.exclude_host;
> + bool guest = !event->attr.exclude_guest;
> /* Disallow cross-task user callchains. */
> bool crosstask = event->ctx->task && event->ctx->task != current;
> const u32 max_stack = event->attr.sample_max_stack;
> @@ -7567,7 +7569,10 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
> if (!kernel && !user)
> return &__empty_callchain;
>
> - callchain = get_perf_callchain(regs, 0, kernel, user,
> + if (!host && !guest)
> + return &__empty_callchain;
> +
> + callchain = get_perf_callchain(regs, 0, kernel, user, host, guest,
> max_stack, crosstask, true);
> return callchain ?: &__empty_callchain;
> }
Best regards,
Maxim Levitsky
Hi Maxim,
At 2023-10-10 16:12 +0000, Maxim Levitsky wrote:
> > +static inline void
> > +perf_callchain_guest32(struct perf_callchain_entry_ctx *entry)
> > +{
> > + struct stack_frame_ia32 frame;
> > + const struct stack_frame_ia32 *fp;
> > +
> > + fp = (void *)perf_guest_get_frame_pointer();
> > + while (fp && entry->nr < entry->max_stack) {
> > + if (!perf_guest_read_virt(&fp->next_frame, &frame.next_frame,
> This should be fp->next_frame.
> > + sizeof(frame.next_frame)))
> > + break;
> > + if (!perf_guest_read_virt(&fp->return_address, &frame.return_address,
> Same here.
> > + sizeof(frame.return_address)))
> > + break;
> > + perf_callchain_store(entry, frame.return_address);
> > + fp = (void *)frame.next_frame;
> > + }
> > +}
> > +
The address space where `fp` resides here is in the guest memory, not in
the directly accessible kernel address space. `&fp->next_frame` and
`&fp->return_address` are simply calculating address offsets in a more
readable manner, much like `fp + 0` and `fp + 4`.
The original implementation of `perf_callchain_user` and
`perf_callchain_user32` also use this approach [1].
>
> For symmetry, maybe it makes sense to have perf_callchain_guest32 and perf_callchain_guest64
> and then make perf_callchain_guest call each? No strong opinion on this of course.
>
The `perf_callchain_guest` and `perf_callchain_guest32` here are simply
designed to mimic `perf_callchain_user` and `perf_callchain_user32` [2].
I'm also open to make the logic fully separate, if this doesn't seem
elegant enough.
[1] https://github.com/torvalds/linux/blob/master/arch/x86/events/core.c#L2890
[2] https://github.com/torvalds/linux/blob/master/arch/x86/events/core.c#L2820
Best regards,
Tianyi Liu
Hi Tianyi,
kernel test robot noticed the following build warnings:
[auto build test WARNING on 8a749fd1a8720d4619c91c8b6e7528c0a355c0aa]
url: https://github.com/intel-lab-lkp/linux/commits/Tianyi-Liu/KVM-Add-arch-specific-interfaces-for-sampling-guest-callchains/20231008-230042
base: 8a749fd1a8720d4619c91c8b6e7528c0a355c0aa
patch link: https://lore.kernel.org/r/SY4P282MB108433024762F1F292D47C2A9DCFA%40SY4P282MB1084.AUSP282.PROD.OUTLOOK.COM
patch subject: [PATCH v2 4/5] perf kvm: Support sampling guest callchains
config: i386-randconfig-061-20231012 (https://download.01.org/0day-ci/archive/20231013/202310130419.cIkNaYZm-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231013/202310130419.cIkNaYZm-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202310130419.cIkNaYZm-lkp@intel.com/
sparse warnings: (new ones prefixed by >>)
>> arch/x86/events/core.c:2808:52: sparse: sparse: incorrect type in argument 1 (different modifiers) @@ expected void *addr @@ got struct stack_frame *const * @@
arch/x86/events/core.c:2808:52: sparse: expected void *addr
arch/x86/events/core.c:2808:52: sparse: got struct stack_frame *const *
>> arch/x86/events/core.c:2811:52: sparse: sparse: incorrect type in argument 1 (different modifiers) @@ expected void *addr @@ got unsigned long const * @@
arch/x86/events/core.c:2811:52: sparse: expected void *addr
arch/x86/events/core.c:2811:52: sparse: got unsigned long const *
>> arch/x86/events/core.c:2784:44: sparse: sparse: incorrect type in argument 1 (different modifiers) @@ expected void *addr @@ got unsigned int const * @@
arch/x86/events/core.c:2784:44: sparse: expected void *addr
arch/x86/events/core.c:2784:44: sparse: got unsigned int const *
arch/x86/events/core.c:2787:44: sparse: sparse: incorrect type in argument 1 (different modifiers) @@ expected void *addr @@ got unsigned int const * @@
arch/x86/events/core.c:2787:44: sparse: expected void *addr
arch/x86/events/core.c:2787:44: sparse: got unsigned int const *
vim +2808 arch/x86/events/core.c
2775
2776 static inline void
2777 perf_callchain_guest32(struct perf_callchain_entry_ctx *entry)
2778 {
2779 struct stack_frame_ia32 frame;
2780 const struct stack_frame_ia32 *fp;
2781
2782 fp = (void *)perf_guest_get_frame_pointer();
2783 while (fp && entry->nr < entry->max_stack) {
> 2784 if (!perf_guest_read_virt(&fp->next_frame, &frame.next_frame,
2785 sizeof(frame.next_frame)))
2786 break;
2787 if (!perf_guest_read_virt(&fp->return_address, &frame.return_address,
2788 sizeof(frame.return_address)))
2789 break;
2790 perf_callchain_store(entry, frame.return_address);
2791 fp = (void *)frame.next_frame;
2792 }
2793 }
2794
2795 void
2796 perf_callchain_guest(struct perf_callchain_entry_ctx *entry)
2797 {
2798 struct stack_frame frame;
2799 const struct stack_frame *fp;
2800 unsigned int guest_state;
2801
2802 guest_state = perf_guest_state();
2803 perf_callchain_store(entry, perf_guest_get_ip());
2804
2805 if (guest_state & PERF_GUEST_64BIT) {
2806 fp = (void *)perf_guest_get_frame_pointer();
2807 while (fp && entry->nr < entry->max_stack) {
> 2808 if (!perf_guest_read_virt(&fp->next_frame, &frame.next_frame,
2809 sizeof(frame.next_frame)))
2810 break;
> 2811 if (!perf_guest_read_virt(&fp->return_address, &frame.return_address,
2812 sizeof(frame.return_address)))
2813 break;
2814 perf_callchain_store(entry, frame.return_address);
2815 fp = (void *)frame.next_frame;
2816 }
2817 } else {
2818 perf_callchain_guest32(entry);
2819 }
2820 }
2821
@@ -2758,11 +2758,6 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
struct unwind_state state;
unsigned long addr;
- if (perf_guest_state()) {
- /* TODO: We don't support guest os callchain now */
- return;
- }
-
if (perf_callchain_store(entry, regs->ip))
return;
@@ -2778,6 +2773,52 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
}
}
+static inline void
+perf_callchain_guest32(struct perf_callchain_entry_ctx *entry)
+{
+ struct stack_frame_ia32 frame;
+ const struct stack_frame_ia32 *fp;
+
+ fp = (void *)perf_guest_get_frame_pointer();
+ while (fp && entry->nr < entry->max_stack) {
+ if (!perf_guest_read_virt(&fp->next_frame, &frame.next_frame,
+ sizeof(frame.next_frame)))
+ break;
+ if (!perf_guest_read_virt(&fp->return_address, &frame.return_address,
+ sizeof(frame.return_address)))
+ break;
+ perf_callchain_store(entry, frame.return_address);
+ fp = (void *)frame.next_frame;
+ }
+}
+
+void
+perf_callchain_guest(struct perf_callchain_entry_ctx *entry)
+{
+ struct stack_frame frame;
+ const struct stack_frame *fp;
+ unsigned int guest_state;
+
+ guest_state = perf_guest_state();
+ perf_callchain_store(entry, perf_guest_get_ip());
+
+ if (guest_state & PERF_GUEST_64BIT) {
+ fp = (void *)perf_guest_get_frame_pointer();
+ while (fp && entry->nr < entry->max_stack) {
+ if (!perf_guest_read_virt(&fp->next_frame, &frame.next_frame,
+ sizeof(frame.next_frame)))
+ break;
+ if (!perf_guest_read_virt(&fp->return_address, &frame.return_address,
+ sizeof(frame.return_address)))
+ break;
+ perf_callchain_store(entry, frame.return_address);
+ fp = (void *)frame.next_frame;
+ }
+ } else {
+ perf_callchain_guest32(entry);
+ }
+}
+
static inline int
valid_user_frame(const void __user *fp, unsigned long size)
{
@@ -2861,11 +2902,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
struct stack_frame frame;
const struct stack_frame __user *fp;
- if (perf_guest_state()) {
- /* TODO: We don't support guest os callchain now */
- return;
- }
-
/*
* We don't know what to do with VM86 stacks.. ignore them for now.
*/
@@ -1545,9 +1545,10 @@ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
+extern void perf_callchain_guest(struct perf_callchain_entry_ctx *entry);
extern struct perf_callchain_entry *
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
- u32 max_stack, bool crosstask, bool add_mark);
+ bool host, bool guest, u32 max_stack, bool crosstask, bool add_mark);
extern int get_callchain_buffers(int max_stack);
extern void put_callchain_buffers(void);
extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
@@ -294,8 +294,8 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
if (max_depth > sysctl_perf_event_max_stack)
max_depth = sysctl_perf_event_max_stack;
- trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
- false, false);
+ trace = get_perf_callchain(regs, 0, kernel, user, true, false,
+ max_depth, false, false);
if (unlikely(!trace))
/* couldn't fetch the stack trace */
@@ -420,8 +420,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
else if (kernel && task)
trace = get_callchain_entry_for_task(task, max_depth);
else
- trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
- false, false);
+ trace = get_perf_callchain(regs, 0, kernel, user, true, false,
+ max_depth, false, false);
if (unlikely(!trace))
goto err_fault;
@@ -45,6 +45,10 @@ __weak void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
{
}
+__weak void perf_callchain_guest(struct perf_callchain_entry_ctx *entry)
+{
+}
+
static void release_callchain_buffers_rcu(struct rcu_head *head)
{
struct callchain_cpus_entries *entries;
@@ -178,11 +182,12 @@ put_callchain_entry(int rctx)
struct perf_callchain_entry *
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
- u32 max_stack, bool crosstask, bool add_mark)
+ bool host, bool guest, u32 max_stack, bool crosstask, bool add_mark)
{
struct perf_callchain_entry *entry;
struct perf_callchain_entry_ctx ctx;
int rctx;
+ unsigned int guest_state;
entry = get_callchain_entry(&rctx);
if (!entry)
@@ -194,6 +199,26 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
ctx.contexts = 0;
ctx.contexts_maxed = false;
+ guest_state = perf_guest_state();
+ if (guest_state) {
+ if (!guest)
+ goto exit_put;
+ if (user && (guest_state & PERF_GUEST_USER)) {
+ if (add_mark)
+ perf_callchain_store_context(&ctx, PERF_CONTEXT_GUEST_USER);
+ perf_callchain_guest(&ctx);
+ }
+ if (kernel && !(guest_state & PERF_GUEST_USER)) {
+ if (add_mark)
+ perf_callchain_store_context(&ctx, PERF_CONTEXT_GUEST_KERNEL);
+ perf_callchain_guest(&ctx);
+ }
+ goto exit_put;
+ }
+
+ if (unlikely(!host))
+ goto exit_put;
+
if (kernel && !user_mode(regs)) {
if (add_mark)
perf_callchain_store_context(&ctx, PERF_CONTEXT_KERNEL);
@@ -7559,6 +7559,8 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
{
bool kernel = !event->attr.exclude_callchain_kernel;
bool user = !event->attr.exclude_callchain_user;
+ bool host = !event->attr.exclude_host;
+ bool guest = !event->attr.exclude_guest;
/* Disallow cross-task user callchains. */
bool crosstask = event->ctx->task && event->ctx->task != current;
const u32 max_stack = event->attr.sample_max_stack;
@@ -7567,7 +7569,10 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
if (!kernel && !user)
return &__empty_callchain;
- callchain = get_perf_callchain(regs, 0, kernel, user,
+ if (!host && !guest)
+ return &__empty_callchain;
+
+ callchain = get_perf_callchain(regs, 0, kernel, user, host, guest,
max_stack, crosstask, true);
return callchain ?: &__empty_callchain;
}