[3/4] perf lock contention: Show per-cpu rq_lock with address
Commit Message
Using the BPF_PROG_RUN mechanism, we can run a raw_tp BPF program to
collect some semi-global locks like per-cpu locks. Let's add runqueue
locks using bpf_per_cpu_ptr() helper.
$ sudo ./perf lock con -abl -- sleep 1
contended total wait max wait avg wait address symbol
248 3.25 ms 32.23 us 13.10 us ffff8cc75cfd2940 siglock
60 217.91 us 9.69 us 3.63 us ffff8cc700061c00
8 70.23 us 13.86 us 8.78 us ffff8cc703629484
4 56.32 us 35.81 us 14.08 us ffff8cc78b66f778 mmap_lock
4 16.70 us 5.18 us 4.18 us ffff8cc7036a0684
3 4.99 us 2.65 us 1.66 us ffff8d053da30c80 rq_lock
2 3.44 us 2.28 us 1.72 us ffff8d053dcf0c80 rq_lock
9 2.51 us 371 ns 278 ns ffff8ccb92479440
2 2.11 us 1.24 us 1.06 us ffff8d053db30c80 rq_lock
2 2.06 us 1.69 us 1.03 us ffff8d053d970c80 rq_lock
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/util/bpf_lock_contention.c | 27 ++++++++++++++--
.../perf/util/bpf_skel/lock_contention.bpf.c | 31 +++++++++++++++++++
tools/perf/util/bpf_skel/lock_data.h | 5 +++
3 files changed, 61 insertions(+), 2 deletions(-)
@@ -151,6 +151,8 @@ int lock_contention_prepare(struct lock_contention *con)
skel->bss->needs_callstack = con->save_callstack;
skel->bss->lock_owner = con->owner;
+ bpf_program__set_autoload(skel->progs.collect_lock_syms, false);
+
lock_contention_bpf__attach(skel);
return 0;
}
@@ -198,14 +200,26 @@ static const char *lock_contention_get_name(struct lock_contention *con,
}
if (con->aggr_mode == LOCK_AGGR_ADDR) {
+ int lock_fd = bpf_map__fd(skel->maps.lock_syms);
+
+ /* per-process locks set upper bits of the flags */
if (flags & LCD_F_MMAP_LOCK)
return "mmap_lock";
if (flags & LCD_F_SIGHAND_LOCK)
return "siglock";
+
+ /* global locks with symbols */
sym = machine__find_kernel_symbol(machine, key->lock_addr, &kmap);
if (sym)
- name = sym->name;
- return name;
+ return sym->name;
+
+ /* try semi-global locks collected separately */
+ if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr, &flags)) {
+ if (flags == LOCK_CLASS_RQLOCK)
+ return "rq_lock";
+ }
+
+ return "";
}
/* LOCK_AGGR_CALLER: skip lock internal functions */
@@ -258,6 +272,15 @@ int lock_contention_read(struct lock_contention *con)
thread__set_comm(idle, "swapper", /*timestamp=*/0);
}
+ if (con->aggr_mode == LOCK_AGGR_ADDR) {
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .flags = BPF_F_TEST_RUN_ON_CPU,
+ );
+ int prog_fd = bpf_program__fd(skel->progs.collect_lock_syms);
+
+ bpf_prog_test_run_opts(prog_fd, &opts);
+ }
+
/* make sure it loads the kernel map */
map__load(maps__first(machine->kmaps));
@@ -10,6 +10,9 @@
/* default buffer size */
#define MAX_ENTRIES 10240
+/* for collect_lock_syms(). 4096 was rejected by the verifier */
+#define MAX_CPUS 1024
+
/* lock contention flags from include/trace/events/lock.h */
#define LCB_F_SPIN (1U << 0)
#define LCB_F_READ (1U << 1)
@@ -56,6 +59,13 @@ struct {
__uint(max_entries, MAX_ENTRIES);
} task_data SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64));
+ __uint(value_size, sizeof(__u32));
+ __uint(max_entries, 16384);
+} lock_syms SEC(".maps");
+
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(__u32));
@@ -378,4 +388,25 @@ int contention_end(u64 *ctx)
return 0;
}
+extern struct rq runqueues __ksym;
+
+SEC("raw_tp/bpf_test_finish")
+int BPF_PROG(collect_lock_syms)
+{
+ __u64 lock_addr;
+ __u32 lock_flag;
+
+ for (int i = 0; i < MAX_CPUS; i++) {
+ struct rq *rq = bpf_per_cpu_ptr(&runqueues, i);
+
+ if (rq == NULL)
+ break;
+
+ lock_addr = (__u64)&rq->__lock;
+ lock_flag = LOCK_CLASS_RQLOCK;
+ bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY);
+ }
+ return 0;
+}
+
char LICENSE[] SEC("license") = "Dual BSD/GPL";
@@ -36,4 +36,9 @@ enum lock_aggr_mode {
LOCK_AGGR_CALLER,
};
+enum lock_class_sym {
+ LOCK_CLASS_NONE,
+ LOCK_CLASS_RQLOCK,
+};
+
#endif /* UTIL_BPF_SKEL_LOCK_DATA_H */