[1/1] perf trace: Collect sys_nanosleep first argument

Message ID ZdYtJs8nWt6ALprh@x1
State New
Headers
Series [1/1] perf trace: Collect sys_nanosleep first argument |

Commit Message

Arnaldo Carvalho de Melo Feb. 21, 2024, 5:04 p.m. UTC
  That is a 'struct timespec' passed from userspace to the kernel as we
can see with a system wide syscall tracing:

  root@number:~# perf trace -e nanosleep
       0.000 (10.102 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
      38.924 (10.077 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
     100.177 (10.107 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
     139.171 (10.063 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
     200.603 (10.105 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
     239.399 (10.064 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
     300.994 (10.096 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
     339.584 (10.067 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
     401.335 (10.057 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
     439.758 (10.166 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
     501.814 (10.110 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
     539.983 (10.227 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
     602.284 (10.199 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
     640.208 (10.105 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
     702.662 (10.163 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
     740.440 (10.107 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
     802.993 (10.159 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
  ^Croot@number:~# strace -p 9150 -e nanosleep

If we then use the ptrace method to look at that podman process:

  root@number:~# strace -p 9150 -e nanosleep
  strace: Process 9150 attached
  nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
  nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
  nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
  nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
  nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
  nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
  nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
  ^Cstrace: Process 9150 detached
  root@number:~#

With some changes we can get something closer to the strace output,
still in system wide mode:

  root@number:~# perf config trace.show_arg_names=false
  root@number:~# perf config trace.show_duration=false
  root@number:~# perf config trace.show_timestamp=false
  root@number:~# perf config trace.show_zeros=true
  root@number:~# perf config trace.args_alignment=0
  root@number:~# perf trace -e nanosleep --max-events=10
  podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
  podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
  podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
  podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
  podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
  podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
  podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
  podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
  podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
  podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
  root@number:~#
  root@number:~# perf config
  trace.show_arg_names=false
  trace.show_duration=false
  trace.show_timestamp=false
  trace.show_zeros=true
  trace.args_alignment=0
  root@number:~# cat ~/.perfconfig
  # this file is auto-generated.
  [trace]
  	show_arg_names = false
  	show_duration = false
  	show_timestamp = false
  	show_zeros = true
  	args_alignment = 0
  root@number:~#

This will not get reused by any other syscall as nanosleep is the only
one to have as its first argument a 'struct timespec" pointer argument
passed from userspace to the kernel:

  root@number:~# grep timespec /sys/kernel/tracing/events/syscalls/sys_enter_*/format | grep offset:16
  /sys/kernel/tracing/events/syscalls/sys_enter_nanosleep/format:	field:struct __kernel_timespec * rqtp;	offset:16;	size:8;	signed:0;
  root@number:~#

BTF based pretty printing will simplify all this, but then lets just get
the low hanging fruits first.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c                    |  2 ++
 .../bpf_skel/augmented_raw_syscalls.bpf.c     | 21 +++++++++++++++++++
 2 files changed, 23 insertions(+)
  

Comments

Ian Rogers Feb. 21, 2024, 5:15 p.m. UTC | #1
On Wed, Feb 21, 2024 at 9:04 AM Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
>
> That is a 'struct timespec' passed from userspace to the kernel as we
> can see with a system wide syscall tracing:
>
>   root@number:~# perf trace -e nanosleep
>        0.000 (10.102 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>       38.924 (10.077 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      100.177 (10.107 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      139.171 (10.063 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      200.603 (10.105 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      239.399 (10.064 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      300.994 (10.096 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      339.584 (10.067 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      401.335 (10.057 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      439.758 (10.166 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      501.814 (10.110 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      539.983 (10.227 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      602.284 (10.199 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      640.208 (10.105 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      702.662 (10.163 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      740.440 (10.107 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      802.993 (10.159 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>   ^Croot@number:~# strace -p 9150 -e nanosleep
>
> If we then use the ptrace method to look at that podman process:
>
>   root@number:~# strace -p 9150 -e nanosleep
>   strace: Process 9150 attached
>   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
>   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
>   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
>   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
>   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
>   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
>   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
>   ^Cstrace: Process 9150 detached
>   root@number:~#
>
> With some changes we can get something closer to the strace output,
> still in system wide mode:
>
>   root@number:~# perf config trace.show_arg_names=false
>   root@number:~# perf config trace.show_duration=false
>   root@number:~# perf config trace.show_timestamp=false
>   root@number:~# perf config trace.show_zeros=true
>   root@number:~# perf config trace.args_alignment=0
>   root@number:~# perf trace -e nanosleep --max-events=10
>   podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   root@number:~#
>   root@number:~# perf config
>   trace.show_arg_names=false
>   trace.show_duration=false
>   trace.show_timestamp=false
>   trace.show_zeros=true
>   trace.args_alignment=0
>   root@number:~# cat ~/.perfconfig
>   # this file is auto-generated.
>   [trace]
>         show_arg_names = false
>         show_duration = false
>         show_timestamp = false
>         show_zeros = true
>         args_alignment = 0
>   root@number:~#
>
> This will not get reused by any other syscall as nanosleep is the only
> one to have as its first argument a 'struct timespec" pointer argument
> passed from userspace to the kernel:
>
>   root@number:~# grep timespec /sys/kernel/tracing/events/syscalls/sys_enter_*/format | grep offset:16
>   /sys/kernel/tracing/events/syscalls/sys_enter_nanosleep/format:       field:struct __kernel_timespec * rqtp;  offset:16;      size:8; signed:0;
>   root@number:~#
>
> BTF based pretty printing will simplify all this, but then lets just get
> the low hanging fruits first.
>
> Cc: Adrian Hunter <adrian.hunter@intel.com>
> Cc: Ian Rogers <irogers@google.com>
> Cc: Jiri Olsa <jolsa@kernel.org>
> Cc: Namhyung Kim <namhyung@kernel.org>
> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

v2? https://lore.kernel.org/lkml/Zbq72dJRpOlfRWnf@kernel.org/

Thanks,
Ian
  
Arnaldo Carvalho de Melo Feb. 21, 2024, 5:21 p.m. UTC | #2
On Wed, Feb 21, 2024 at 09:15:01AM -0800, Ian Rogers wrote:
> On Wed, Feb 21, 2024 at 9:04 AM Arnaldo Carvalho de Melo
> <acme@kernel.org> wrote:
> >
> > That is a 'struct timespec' passed from userspace to the kernel as we
> > can see with a system wide syscall tracing:
> >
> >   root@number:~# perf trace -e nanosleep
> >        0.000 (10.102 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >       38.924 (10.077 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >      100.177 (10.107 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >      139.171 (10.063 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >      200.603 (10.105 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >      239.399 (10.064 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >      300.994 (10.096 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >      339.584 (10.067 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >      401.335 (10.057 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >      439.758 (10.166 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >      501.814 (10.110 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >      539.983 (10.227 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >      602.284 (10.199 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >      640.208 (10.105 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >      702.662 (10.163 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >      740.440 (10.107 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >      802.993 (10.159 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
> >   ^Croot@number:~# strace -p 9150 -e nanosleep
> >
> > If we then use the ptrace method to look at that podman process:
> >
> >   root@number:~# strace -p 9150 -e nanosleep
> >   strace: Process 9150 attached
> >   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
> >   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
> >   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
> >   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
> >   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
> >   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
> >   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
> >   ^Cstrace: Process 9150 detached
> >   root@number:~#
> >
> > With some changes we can get something closer to the strace output,
> > still in system wide mode:
> >
> >   root@number:~# perf config trace.show_arg_names=false
> >   root@number:~# perf config trace.show_duration=false
> >   root@number:~# perf config trace.show_timestamp=false
> >   root@number:~# perf config trace.show_zeros=true
> >   root@number:~# perf config trace.args_alignment=0
> >   root@number:~# perf trace -e nanosleep --max-events=10
> >   podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
> >   podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
> >   podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
> >   podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
> >   podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
> >   podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
> >   podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
> >   podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
> >   podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
> >   podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
> >   root@number:~#
> >   root@number:~# perf config
> >   trace.show_arg_names=false
> >   trace.show_duration=false
> >   trace.show_timestamp=false
> >   trace.show_zeros=true
> >   trace.args_alignment=0
> >   root@number:~# cat ~/.perfconfig
> >   # this file is auto-generated.
> >   [trace]
> >         show_arg_names = false
> >         show_duration = false
> >         show_timestamp = false
> >         show_zeros = true
> >         args_alignment = 0
> >   root@number:~#
> >
> > This will not get reused by any other syscall as nanosleep is the only
> > one to have as its first argument a 'struct timespec" pointer argument
> > passed from userspace to the kernel:
> >
> >   root@number:~# grep timespec /sys/kernel/tracing/events/syscalls/sys_enter_*/format | grep offset:16
> >   /sys/kernel/tracing/events/syscalls/sys_enter_nanosleep/format:       field:struct __kernel_timespec * rqtp;  offset:16;      size:8; signed:0;
> >   root@number:~#
> >
> > BTF based pretty printing will simplify all this, but then lets just get
> > the low hanging fruits first.
> >
> > Cc: Adrian Hunter <adrian.hunter@intel.com>
> > Cc: Ian Rogers <irogers@google.com>
> > Cc: Jiri Olsa <jolsa@kernel.org>
> > Cc: Namhyung Kim <namhyung@kernel.org>
> > Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> 
> v2? https://lore.kernel.org/lkml/Zbq72dJRpOlfRWnf@kernel.org/

Nope, I thought I hadn't submitted and when doing a

  git rebase perf-tools-next/perf-tools-next


It was rebased, so I thought I really hadn't submitted it and sent it.

- Arnaldo
  

Patch

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 109b8e64fe69ae32..6abe280dc38f1921 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1048,6 +1048,8 @@  static const struct syscall_fmt syscall_fmts[] = {
 	  .arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, }, },
 	{ .name	    = "name_to_handle_at",
 	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+	{ .name	    = "nanosleep",
+	  .arg = { [0] = { .scnprintf = SCA_TIMESPEC,  /* req */ }, }, },
 	{ .name	    = "newfstatat",
 	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
 	{ .name	    = "open",
diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index 2872f9bc07850bd7..0acbd74e8c760956 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -341,6 +341,27 @@  int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
 	return 1; /* Failure: don't filter */
 }
 
+SEC("tp/syscalls/sys_enter_nanosleep")
+int sys_enter_nanosleep(struct syscall_enter_args *args)
+{
+	struct augmented_args_payload *augmented_args = augmented_args_payload();
+	const void *req_arg = (const void *)args->args[0];
+	unsigned int len = sizeof(augmented_args->args);
+	__u32 size = sizeof(struct timespec64);
+
+        if (augmented_args == NULL)
+		goto failure;
+
+	if (size > sizeof(augmented_args->__data))
+                goto failure;
+
+	bpf_probe_read_user(&augmented_args->__data, size, req_arg);
+
+	return augmented__output(args, augmented_args, len + size);
+failure:
+	return 1; /* Failure: don't filter */
+}
+
 static pid_t getpid(void)
 {
 	return bpf_get_current_pid_tgid();