[1/2] perf top: Add --branch-history option

Message ID 20230330131833.12864-2-adrian.hunter@intel.com
State New
Headers
Series perf top: Add --branch-history option |

Commit Message

Adrian Hunter March 30, 2023, 1:18 p.m. UTC
  Add --branch-history option, to act the same as that option does for
perf report.

Example:

  $ cat tcallf.c
  volatile a = 10000, b = 100000, c;

  __attribute__((noinline)) f2()
  {
          c = a / b;
  }

  __attribute__((noinline)) f1()
  {
          f2();
          f2();
  }
  main()
  {
          while (1)
                  f1();
  }
  $ gcc -w -g -o tcallf tcallf.c
  $ ./tcallf &
  [1] 29409
  $ perf top -e cycles:u  -t $(pidof tcallf) --stdio --no-children --branch-history
     PerfTop:    3819 irqs/sec  kernel: 0.0%  exact:  0.0% lost: 0/0 drop: 0/0 [4000Hz cycles:u],  (target_tid: 29409)
  --------------------------------------------------------------------------------------------------------------------

      49.01%  tcallf.c:5   [.] f2    tcallf
              |
              |--24.91%--f2 tcallf.c:4
              |          |
              |          |--17.14%--f1 tcallf.c:11 (cycles:1)
              |          |          f1 tcallf.c:11
              |          |          f2 tcallf.c:6 (cycles:3)
              |          |          f2 tcallf.c:4
              |          |          f1 tcallf.c:10 (cycles:2)
              |          |          f1 tcallf.c:9
              |          |          main tcallf.c:16 (cycles:1)
              |          |          main tcallf.c:16
              |          |          main tcallf.c:16 (cycles:1)
              |          |          main tcallf.c:16
              |          |          f1 tcallf.c:12 (cycles:1)
              |          |          f1 tcallf.c:12
              |          |          f2 tcallf.c:6 (cycles:3)
              |          |          f2 tcallf.c:4
              |          |          f1 tcallf.c:11 (cycles:1 iter:1 avg_cycles:12)
              |          |          f1 tcallf.c:11
              |          |          f2 tcallf.c:6 (cycles:3 iter:1 avg_cycles:12)
              |          |          f2 tcallf.c:4
              |          |          f1 tcallf.c:10 (cycles:2 iter:1 avg_cycles:12)
              |          |
              |           --7.78%--f1 tcallf.c:10 (cycles:2)
              |                     f1 tcallf.c:9
              |                     main tcallf.c:16 (cycles:1)
              |                     main tcallf.c:16
              |                     main tcallf.c:16 (cycles:1)
              |                     main tcallf.c:16
              |                     f1 tcallf.c:12 (cycles:1)
              |                     f1 tcallf.c:12
              |                     f2 tcallf.c:6 (cycles:3)
              |                     f2 tcallf.c:4
              |                     f1 tcallf.c:11 (cycles:1)
              |                     f1 tcallf.c:11
              |                     f2 tcallf.c:6 (cycles:3)
              |                     f2 tcallf.c:4
              |                     f1 tcallf.c:10 (cycles:2 iter:1 avg_cycles:12)
              |                     f1 tcallf.c:9
              |                     main tcallf.c:16 (cycles:1 iter:1 avg_cycles:12)
              |                     main tcallf.c:16
              |                     main tcallf.c:16 (cycles:1 iter:1 avg_cycles:12)
  ...

  $ pkill tcallf
  [1]+  Terminated              ./tcallf

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---
 tools/perf/Documentation/perf-top.txt |  4 ++++
 tools/perf/builtin-top.c              | 17 +++++++++++++++++
 2 files changed, 21 insertions(+)
  

Patch

diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 619cc8143ad5..3c202ec080ba 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -254,6 +254,10 @@  Default is to monitor all CPUS.
 	The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
 	Note that this feature may not be available on all processors.
 
+--branch-history::
+	Add the addresses of sampled taken branches to the callstack.
+	This allows to examine the path the program took to each sample.
+
 --raw-trace::
 	When displaying traceevent output, do not use print fmt or plugins.
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 82c6c065830d..2c985cfea517 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1437,6 +1437,7 @@  int cmd_top(int argc, const char **argv)
 		.max_stack	     = sysctl__max_stack(),
 		.nr_threads_synthesize = UINT_MAX,
 	};
+	bool branch_call_mode = false;
 	struct record_opts *opts = &top.record_opts;
 	struct target *target = &opts->target;
 	const char *disassembler_style = NULL, *objdump_path = NULL, *addr2line_path = NULL;
@@ -1551,6 +1552,8 @@  int cmd_top(int argc, const char **argv)
 	OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
 		     "branch filter mask", "branch stack filter modes",
 		     parse_branch_stack),
+	OPT_BOOLEAN(0, "branch-history", &branch_call_mode,
+		    "add last branch records to call history"),
 	OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
 		    "Show raw trace event output (do not use print fmt or plugins)"),
 	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
@@ -1677,6 +1680,20 @@  int cmd_top(int argc, const char **argv)
 		goto out_delete_evlist;
 	}
 
+	if (branch_call_mode) {
+		if (!opts->branch_stack)
+			opts->branch_stack = PERF_SAMPLE_BRANCH_ANY;
+		symbol_conf.use_callchain = true;
+		callchain_param.key = CCKEY_ADDRESS;
+		callchain_param.branch_callstack = true;
+		callchain_param.enabled = true;
+		if (callchain_param.record_mode == CALLCHAIN_NONE)
+			callchain_param.record_mode = CALLCHAIN_FP;
+		callchain_register_param(&callchain_param);
+		if (!sort_order)
+			sort_order = "srcline,symbol,dso";
+	}
+
 	if (opts->branch_stack && callchain_param.enabled)
 		symbol_conf.show_branchflag_count = true;