[1/2] perf top: Add --branch-history option
Commit Message
Add --branch-history option, to act the same as that option does for
perf report.
Example:
$ cat tcallf.c
volatile a = 10000, b = 100000, c;
__attribute__((noinline)) f2()
{
c = a / b;
}
__attribute__((noinline)) f1()
{
f2();
f2();
}
main()
{
while (1)
f1();
}
$ gcc -w -g -o tcallf tcallf.c
$ ./tcallf &
[1] 29409
$ perf top -e cycles:u -t $(pidof tcallf) --stdio --no-children --branch-history
PerfTop: 3819 irqs/sec kernel: 0.0% exact: 0.0% lost: 0/0 drop: 0/0 [4000Hz cycles:u], (target_tid: 29409)
--------------------------------------------------------------------------------------------------------------------
49.01% tcallf.c:5 [.] f2 tcallf
|
|--24.91%--f2 tcallf.c:4
| |
| |--17.14%--f1 tcallf.c:11 (cycles:1)
| | f1 tcallf.c:11
| | f2 tcallf.c:6 (cycles:3)
| | f2 tcallf.c:4
| | f1 tcallf.c:10 (cycles:2)
| | f1 tcallf.c:9
| | main tcallf.c:16 (cycles:1)
| | main tcallf.c:16
| | main tcallf.c:16 (cycles:1)
| | main tcallf.c:16
| | f1 tcallf.c:12 (cycles:1)
| | f1 tcallf.c:12
| | f2 tcallf.c:6 (cycles:3)
| | f2 tcallf.c:4
| | f1 tcallf.c:11 (cycles:1 iter:1 avg_cycles:12)
| | f1 tcallf.c:11
| | f2 tcallf.c:6 (cycles:3 iter:1 avg_cycles:12)
| | f2 tcallf.c:4
| | f1 tcallf.c:10 (cycles:2 iter:1 avg_cycles:12)
| |
| --7.78%--f1 tcallf.c:10 (cycles:2)
| f1 tcallf.c:9
| main tcallf.c:16 (cycles:1)
| main tcallf.c:16
| main tcallf.c:16 (cycles:1)
| main tcallf.c:16
| f1 tcallf.c:12 (cycles:1)
| f1 tcallf.c:12
| f2 tcallf.c:6 (cycles:3)
| f2 tcallf.c:4
| f1 tcallf.c:11 (cycles:1)
| f1 tcallf.c:11
| f2 tcallf.c:6 (cycles:3)
| f2 tcallf.c:4
| f1 tcallf.c:10 (cycles:2 iter:1 avg_cycles:12)
| f1 tcallf.c:9
| main tcallf.c:16 (cycles:1 iter:1 avg_cycles:12)
| main tcallf.c:16
| main tcallf.c:16 (cycles:1 iter:1 avg_cycles:12)
...
$ pkill tcallf
[1]+ Terminated ./tcallf
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---
tools/perf/Documentation/perf-top.txt | 4 ++++
tools/perf/builtin-top.c | 17 +++++++++++++++++
2 files changed, 21 insertions(+)
@@ -254,6 +254,10 @@ Default is to monitor all CPUS.
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
Note that this feature may not be available on all processors.
+--branch-history::
+ Add the addresses of sampled taken branches to the callstack.
+ This allows to examine the path the program took to each sample.
+
--raw-trace::
When displaying traceevent output, do not use print fmt or plugins.
@@ -1437,6 +1437,7 @@ int cmd_top(int argc, const char **argv)
.max_stack = sysctl__max_stack(),
.nr_threads_synthesize = UINT_MAX,
};
+ bool branch_call_mode = false;
struct record_opts *opts = &top.record_opts;
struct target *target = &opts->target;
const char *disassembler_style = NULL, *objdump_path = NULL, *addr2line_path = NULL;
@@ -1551,6 +1552,8 @@ int cmd_top(int argc, const char **argv)
OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
"branch filter mask", "branch stack filter modes",
parse_branch_stack),
+ OPT_BOOLEAN(0, "branch-history", &branch_call_mode,
+ "add last branch records to call history"),
OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
"Show raw trace event output (do not use print fmt or plugins)"),
OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
@@ -1677,6 +1680,20 @@ int cmd_top(int argc, const char **argv)
goto out_delete_evlist;
}
+ if (branch_call_mode) {
+ if (!opts->branch_stack)
+ opts->branch_stack = PERF_SAMPLE_BRANCH_ANY;
+ symbol_conf.use_callchain = true;
+ callchain_param.key = CCKEY_ADDRESS;
+ callchain_param.branch_callstack = true;
+ callchain_param.enabled = true;
+ if (callchain_param.record_mode == CALLCHAIN_NONE)
+ callchain_param.record_mode = CALLCHAIN_FP;
+ callchain_register_param(&callchain_param);
+ if (!sort_order)
+ sort_order = "srcline,symbol,dso";
+ }
+
if (opts->branch_stack && callchain_param.enabled)
symbol_conf.show_branchflag_count = true;