[RFC,v1,4/4] perf tool: Add HiSilicon PMCU data decoding support

Message ID 20230206065146.645505-5-zhanjie9@hisilicon.com
State New
Headers
Series HiSilicon Performance Monitor Control Unit |

Commit Message

Jie Zhan Feb. 6, 2023, 6:51 a.m. UTC
  Support for dumping raw trace of HiSilicon PMCU data using 'perf-report'
or 'perf-script'

Example usage:

 # perf report -D

Output will contain the raw PMCU data with notes, such as:

. ... HISI PMCU data: size 0x9630 bytes
. ... Header: size 0x30 bytes
.  00000000:  00 00 40 00 04 00 00 00 08 00 00 00 00 00 00 00
.  00000010:  80 01 00 00 01 00 00 00 04 00 00 00 10 00 00 00
.  00000020:  11 00 00 00 12 00 00 00 13 00 00 00 00 00 00 00
.  Auxtrace buffer max size: 0x400000
.  Number of PMU counters in parallel: 4
.  Number of monitored CPUs: 8
.  Compatible mode: no
.  Subsample size: 0x180
.  Number of subsamples per sample: 1
.  Number of events: 4
.  Event   0: 0x0010
.  Event   1: 0x0011
.  Event   2: 0x0012
.  Event   3: 0x0013
. ... Data: size 0x9600 bytes
.  Sample 0
.    Subsample 0
.    00000030:  00000000            PMCID0SR CPU 0
.    00000034:  00000000            PMCID0SR CPU 1
.    00000038:  00000000            PMCID0SR CPU 2
.    0000003c:  00000000            PMCID0SR CPU 3
.    00000040:  00000000            PMCID0SR CPU 4
.    00000044:  00000000            PMCID0SR CPU 5
.    00000048:  00000000            PMCID0SR CPU 6
.    0000004c:  00000000            PMCID0SR CPU 7
.    00000050:  000000ba            PMCID1SR CPU 0
.    00000054:  000056fe            PMCID1SR CPU 1
.    00000058:  00000000            PMCID1SR CPU 2
.    0000005c:  00000000            PMCID1SR CPU 3
.    00000060:  00000195            PMCID1SR CPU 4
.    00000064:  000056fc            PMCID1SR CPU 5
.    00000068:  00000000            PMCID1SR CPU 6
.    0000006c:  00000000            PMCID1SR CPU 7
.    00000070:  0000000000000000    Event 0010 CPU 0
.    00000078:  0000000000000000    Event 0010 CPU 1
.    00000080:  0000000000000000    Event 0010 CPU 2
.    00000088:  0000000000000000    Event 0010 CPU 3
.    00000090:  0000000000000000    Event 0010 CPU 4
.    00000098:  0000000000000001    Event 0010 CPU 5
.    000000a0:  0000000000000000    Event 0010 CPU 6
.    000000a8:  0000000000000000    Event 0010 CPU 7
.    000000b0:  0000000000000000    Event 0011 CPU 0
.    000000b8:  0000000000000000    Event 0011 CPU 1
.    000000c0:  0000000000000000    Event 0011 CPU 2
.    000000c8:  0000000000000000    Event 0011 CPU 3
.    000000d0:  000000000000d614    Event 0011 CPU 4
.    000000d8:  000000000000046b    Event 0011 CPU 5
.    000000e0:  0000000000000000    Event 0011 CPU 6
.    000000e8:  0000000000000000    Event 0011 CPU 7
.    000000f0:  0000000000000000    Event 0012 CPU 0
.    000000f8:  0000000000000000    Event 0012 CPU 1
.    00000100:  0000000000000000    Event 0012 CPU 2
.    00000108:  0000000000000000    Event 0012 CPU 3
.    00000110:  00000000000000f4    Event 0012 CPU 4
.    00000118:  0000000000000003    Event 0012 CPU 5
.    00000120:  0000000000000000    Event 0012 CPU 6
.    00000128:  0000000000000000    Event 0012 CPU 7
.    00000130:  0000000000000000    Event 0013 CPU 0
.    00000138:  0000000000000000    Event 0013 CPU 1
.    00000140:  0000000000000000    Event 0013 CPU 2
.    00000148:  0000000000000000    Event 0013 CPU 3
.    00000150:  00000000000000f4    Event 0013 CPU 4
.    00000158:  0000000000000004    Event 0013 CPU 5
.    00000160:  0000000000000000    Event 0013 CPU 6
.    00000168:  0000000000000000    Event 0013 CPU 7
.    00000170:  000000000000d614    Cycle count CPU 0
.    00000178:  000000000000d614    Cycle count CPU 1
.    00000180:  0000000000000000    Cycle count CPU 2
.    00000188:  0000000000000000    Cycle count CPU 3
.    00000190:  000000000000d614    Cycle count CPU 4
.    00000198:  000000000000d614    Cycle count CPU 5
.    000001a0:  0000000000000000    Cycle count CPU 6
.    000001a8:  0000000000000000    Cycle count CPU 7
(...more data follows)

Signed-off-by: Jie Zhan <zhanjie9@hisilicon.com>
---
 tools/perf/util/Build       |   1 +
 tools/perf/util/auxtrace.c  |   4 +
 tools/perf/util/hisi-pmcu.c | 305 ++++++++++++++++++++++++++++++++++++
 tools/perf/util/hisi-pmcu.h |   2 +
 4 files changed, 312 insertions(+)
 create mode 100644 tools/perf/util/hisi-pmcu.c
  

Patch

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index e315ecaec323..e062a2c1b962 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -120,6 +120,7 @@  perf-$(CONFIG_AUXTRACE) += arm-spe.o
 perf-$(CONFIG_AUXTRACE) += arm-spe-decoder/
 perf-$(CONFIG_AUXTRACE) += hisi-ptt.o
 perf-$(CONFIG_AUXTRACE) += hisi-ptt-decoder/
+perf-$(CONFIG_AUXTRACE) += hisi-pmcu.o
 perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o
 
 ifdef CONFIG_LIBOPENCSD
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 46ada5ec3f9a..ac19220d307e 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -53,6 +53,7 @@ 
 #include "intel-bts.h"
 #include "arm-spe.h"
 #include "hisi-ptt.h"
+#include "hisi-pmcu.h"
 #include "s390-cpumsf.h"
 #include "util/mmap.h"
 
@@ -1324,6 +1325,9 @@  int perf_event__process_auxtrace_info(struct perf_session *session,
 	case PERF_AUXTRACE_HISI_PTT:
 		err = hisi_ptt_process_auxtrace_info(event, session);
 		break;
+	case PERF_AUXTRACE_HISI_PMCU:
+		err = hisi_pmcu_process_auxtrace_info(event, session);
+		break;
 	case PERF_AUXTRACE_UNKNOWN:
 	default:
 		return -EINVAL;
diff --git a/tools/perf/util/hisi-pmcu.c b/tools/perf/util/hisi-pmcu.c
new file mode 100644
index 000000000000..7e0b41cd464d
--- /dev/null
+++ b/tools/perf/util/hisi-pmcu.c
@@ -0,0 +1,305 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon Performance Monitor Control Unit (PMCU) support
+ *
+ * Copyright (C) 2022 HiSilicon Limited
+ */
+
+#include <errno.h>
+#include <linux/math.h>
+#include <linux/types.h>
+#include <linux/zalloc.h>
+#include <perf/event.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "auxtrace.h"
+#include "color.h"
+#include "debug.h"
+#include "event.h"
+#include "evsel.h"
+#include "hisi-pmcu.h"
+#include "session.h"
+#include "tool.h"
+#include <internal/lib.h>
+
+#define HISI_PMCU_AUX_HEADER_ALIGN	0x10
+#define HISI_PMCU_NR_CPU_CLUSTER	8
+#define dump_print(fmt, ...) \
+	color_fprintf(stdout, PERF_COLOR_BLUE, fmt, ##__VA_ARGS__)
+
+enum hisi_pmcu_auxtrace_header_index {
+	HISI_PMCU_HEADER_BUFFER_SIZE,
+	HISI_PMCU_HEADER_NR_PMU,
+	HISI_PMCU_HEADER_NR_CPU,
+	HISI_PMCU_HEADER_COMP_MODE,
+	HISI_PMCU_HEADER_SUBSAMPLE_SIZE,
+	HISI_PMCU_HEADER_NR_SUBSAMPLE_PER_SAMPLE,
+	HISI_PMCU_HEADER_NR_EVENT,
+	HISI_PMCU_HEADER_MAX
+};
+
+struct hisi_pmcu_aux_header_info {
+	u32 buffer_size;
+	u32 nr_pmu;
+	u32 nr_cpu;
+	u32 comp_mode;
+	u32 subsample_size;
+	u32 nr_subsample_per_sample;
+	u32 nr_event;
+	u32 events[];
+};
+
+struct hisi_pmcu_process {
+	u32 pmu_type;
+	struct auxtrace auxtrace;
+	struct hisi_pmcu_aux_header_info *header;
+};
+
+static int hisi_pmcu_process_event(struct perf_session *session __maybe_unused,
+				   union perf_event *event __maybe_unused,
+				   struct perf_sample *sample __maybe_unused,
+				   struct perf_tool *tool __maybe_unused)
+{
+	return 0;
+}
+
+static int hisi_pmcu_process_header(struct hisi_pmcu_process *pmcu,
+				    const unsigned char *__data, u64 size)
+{
+	struct hisi_pmcu_aux_header_info *header;
+	const u32 *data = (const u32 *) __data;
+	unsigned int i, j;
+	u32 read_size;
+
+	read_size = HISI_PMCU_HEADER_MAX * sizeof(*data);
+	if (size < read_size)
+		return -EINVAL;
+
+	read_size += data[HISI_PMCU_HEADER_NR_EVENT] * sizeof(*data);
+	if (size < read_size)
+		return -EINVAL;
+
+	pmcu->header = malloc(read_size);
+	header = pmcu->header;
+	memcpy(header, data, read_size);
+	read_size = round_up(read_size, HISI_PMCU_AUX_HEADER_ALIGN);
+
+	dump_print(". ... Header: size 0x%lx bytes\n", read_size);
+	for (i = 0; i < read_size; i += HISI_PMCU_AUX_HEADER_ALIGN) {
+		dump_print(".  %08lx:  ", i);
+		for (j = 0; j < HISI_PMCU_AUX_HEADER_ALIGN; j++)
+			dump_print("%02x ", __data[i + j]);
+		dump_print("\n");
+	}
+
+	dump_print(".  Auxtrace buffer max size: 0x%lx\n", header->buffer_size);
+	dump_print(".  Number of PMU counters in parallel: %d\n", header->nr_pmu);
+	dump_print(".  Number of monitored CPUs: %d\n", header->nr_cpu);
+	dump_print(".  Compatible mode: %s\n", header->comp_mode ? "yes" : "no");
+	dump_print(".  Subsample size: 0x%lx\n", header->subsample_size);
+	dump_print(".  Number of subsamples per sample: %d\n", header->nr_subsample_per_sample);
+	dump_print(".  Number of events: %d\n", header->nr_event);
+
+	for (i = 0; i < header->nr_event; i++)
+		dump_print(".  Event %3d: 0x%04x\n", i, header->events[i]);
+
+	return read_size;
+}
+
+static int hisi_pmcu_dump_subsample(struct hisi_pmcu_aux_header_info *header,
+				    const unsigned char *data, u64 offset,
+				    u32 evoffset)
+{
+	int nr_cluster, core, cid, i;
+	u32 pos = 0, event;
+
+	nr_cluster = header->nr_cpu / HISI_PMCU_NR_CPU_CLUSTER;
+
+	for (cid = 0; cid < 2; cid++) {
+		for (core = 0; core < HISI_PMCU_NR_CPU_CLUSTER; core++) {
+			for (i = 0; i < nr_cluster; i++) {
+				dump_print(".    %08lx:  %08lx            PMCID%dSR CPU %d\n",
+					   offset + pos, *(u32 *) (data + pos),
+					   cid,
+					   core + i * HISI_PMCU_NR_CPU_CLUSTER);
+				pos += sizeof(u32);
+			}
+		}
+	}
+
+	for (event = 0; event < header->nr_pmu; event++) {
+		for (core = 0; core < HISI_PMCU_NR_CPU_CLUSTER; core++) {
+			for (i = 0; i < nr_cluster; i++) {
+				dump_print(".    %08lx:  %016llx    Event %04lx CPU %d\n",
+					   offset + pos, *(u64 *) (data + pos),
+					   header->events[event + evoffset],
+					   core + i * HISI_PMCU_NR_CPU_CLUSTER);
+				pos += sizeof(u64);
+			}
+		}
+	}
+
+	if (!header->comp_mode) {
+		for (core = 0; core < HISI_PMCU_NR_CPU_CLUSTER; core++) {
+			for (i = 0; i < nr_cluster; i++) {
+				dump_print(".    %08lx:  %016llx    Cycle count CPU %d\n",
+					   offset + pos, *(u64 *) (data + pos),
+					   core + i * HISI_PMCU_NR_CPU_CLUSTER);
+				pos += sizeof(u64);
+			}
+		}
+	}
+
+	return pos;
+}
+
+static int hisi_pmcu_dump_sample(struct hisi_pmcu_aux_header_info *header,
+				 const unsigned char *data, u64 offset)
+{
+	u32 pos = 0, i = 0;
+
+	while (i < header->nr_subsample_per_sample) {
+		dump_print(".    Subsample %d\n", i + 1);
+		pos += hisi_pmcu_dump_subsample(header, data + pos,
+						offset + pos,
+						i * header->nr_pmu);
+		i++;
+	}
+
+	return pos;
+}
+
+static int hisi_pmcu_dump_data(struct hisi_pmcu_process *pmcu,
+			       const unsigned char *data, u64 size)
+{
+	struct hisi_pmcu_aux_header_info *header;
+	u32 sample_size;
+	u32 nr_sample;
+	u64 pos = 0;
+	int ret;
+
+	dump_print(". ... HISI PMCU data: size 0x%lx bytes\n", size);
+
+	ret = hisi_pmcu_process_header(pmcu, data, size);
+	if (ret < 0)
+		return ret;
+
+	pos += ret;
+
+	header = pmcu->header;
+	sample_size = header->subsample_size * header->nr_subsample_per_sample;
+	nr_sample = 1;
+	dump_print(". ... Data: size 0x%lx bytes\n", size - pos);
+	while (pos < size) {
+		u32 buf_remain;
+
+		dump_print(".  Sample %d\n", nr_sample);
+		pos += hisi_pmcu_dump_sample(header, data + pos, pos);
+		nr_sample++;
+
+		// Skip gap at the end of an auxtrace buffer
+		buf_remain = header->buffer_size - pos % header->buffer_size;
+		if (buf_remain < sample_size)
+			pos += buf_remain;
+	}
+
+	return 0;
+}
+
+static int hisi_pmcu_process_auxtrace_event(struct perf_session *session,
+					    union perf_event *event,
+					    struct perf_tool *tool __maybe_unused)
+{
+	struct hisi_pmcu_process *pmcu_process;
+	void *data;
+	u64 size;
+	int fd;
+
+	if (!dump_trace)
+		return 0;
+
+	size = event->auxtrace.size;
+	if (!size)
+		return 0;
+
+	data = malloc(size);
+	if (!data)
+		return -errno;
+
+	fd = perf_data__fd(session->data);
+
+	if (readn(fd, data, size) < 0) {
+		free(data);
+		return -errno;
+	}
+
+	pmcu_process = container_of(session->auxtrace,
+				    struct hisi_pmcu_process, auxtrace);
+
+	return hisi_pmcu_dump_data(pmcu_process, data, size);
+}
+
+static int hisi_pmcu_flush_events(struct perf_session *session __maybe_unused,
+				  struct perf_tool *tool __maybe_unused)
+{
+	return 0;
+}
+
+static void hisi_pmcu_free_events(struct perf_session *session __maybe_unused)
+{
+}
+
+static void hisi_pmcu_free(struct perf_session *session)
+{
+	struct hisi_pmcu_process *pmcu_process;
+
+	pmcu_process = container_of(session->auxtrace,
+				    struct hisi_pmcu_process, auxtrace);
+
+	session->auxtrace = NULL;
+	free(pmcu_process);
+}
+
+static bool hisi_pmcu_evsel_is_auxtrace(struct perf_session *session,
+					struct evsel *evsel)
+{
+	struct hisi_pmcu_process *pmcu_process;
+
+	pmcu_process = container_of(session->auxtrace,
+				    struct hisi_pmcu_process, auxtrace);
+
+	return evsel->core.attr.type == pmcu_process->pmu_type;
+}
+
+int hisi_pmcu_process_auxtrace_info(union perf_event *event,
+				    struct perf_session *session)
+{
+	struct perf_record_auxtrace_info *auxtrace_info;
+	struct hisi_pmcu_process *pmcu_process;
+
+	auxtrace_info = &event->auxtrace_info;
+
+	if (auxtrace_info->header.size < sizeof(*auxtrace_info) +
+					 HISI_PMCU_AUXTRACE_PRIV_SIZE)
+		return -EINVAL;
+
+	pmcu_process = zalloc(sizeof(*pmcu_process));
+	if (!pmcu_process)
+		return -ENOMEM;
+
+	pmcu_process->pmu_type = auxtrace_info->priv[0];
+
+	pmcu_process->auxtrace = (struct auxtrace) {
+		.process_event =  hisi_pmcu_process_event,
+		.process_auxtrace_event = hisi_pmcu_process_auxtrace_event,
+		.flush_events = hisi_pmcu_flush_events,
+		.free_events = hisi_pmcu_free_events,
+		.free = hisi_pmcu_free,
+		.evsel_is_auxtrace = hisi_pmcu_evsel_is_auxtrace,
+	};
+
+	session->auxtrace = &pmcu_process->auxtrace;
+
+	return 0;
+}
diff --git a/tools/perf/util/hisi-pmcu.h b/tools/perf/util/hisi-pmcu.h
index d46d523a3aee..8df74695164b 100644
--- a/tools/perf/util/hisi-pmcu.h
+++ b/tools/perf/util/hisi-pmcu.h
@@ -14,4 +14,6 @@ 
 struct auxtrace_record *hisi_pmcu_recording_init(int *err,
 					struct perf_pmu *hisi_pmcu_pmu);
 
+int hisi_pmcu_process_auxtrace_info(union perf_event *event,
+				    struct perf_session *session);
 #endif