[V10,07/10] arm64/perf: Add PERF_ATTACH_TASK_DATA to events with has_branch_stack()
Commit Message
Short running processes i.e those getting very small cpu run time each time
when they get scheduled on, might not accumulate much branch records before
a PMU IRQ really happens. This increases possibility, for such processes to
loose much of its branch records, while being scheduled in-out of various
cpus on the system.
There is a need to save all occurred branch records during the cpu run time
while the process gets scheduled out. It requires an event context specific
buffer for such storage.
This adds PERF_ATTACH_TASK_DATA flag unconditionally, for all branch stack
sampling events, which would allocate task_ctx_data during its event init.
This also creates a platform specific task_ctx_data kmem cache which will
serve such allocation requests.
This adds a new structure 'arm64_perf_task_context' which encapsulates brbe
register set for maximum possible BRBE entries on the HW along with a valid
records tracking element.
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
drivers/perf/arm_brbe.c | 13 +++++++++++++
drivers/perf/arm_brbe.h | 13 +++++++++++++
drivers/perf/arm_pmuv3.c | 8 ++++++--
3 files changed, 32 insertions(+), 2 deletions(-)
@@ -109,20 +109,33 @@ bool armv8pmu_branch_valid(struct perf_event *event)
return true;
}
+static inline struct kmem_cache *
+arm64_create_brbe_task_ctx_kmem_cache(size_t size)
+{
+ return kmem_cache_create("arm64_brbe_task_ctx", size, 0, 0, NULL);
+}
+
int armv8pmu_private_alloc(struct arm_pmu *arm_pmu)
{
struct brbe_hw_attr *brbe_attr = kzalloc(sizeof(struct brbe_hw_attr), GFP_KERNEL);
+ size_t size = sizeof(struct arm64_perf_task_context);
if (!brbe_attr)
return -ENOMEM;
arm_pmu->private = brbe_attr;
+ arm_pmu->pmu.task_ctx_cache = arm64_create_brbe_task_ctx_kmem_cache(size);
+ if (!arm_pmu->pmu.task_ctx_cache) {
+ kfree(arm_pmu->private);
+ return -ENOMEM;
+ }
return 0;
}
void armv8pmu_private_free(struct arm_pmu *arm_pmu)
{
kfree(arm_pmu->private);
+ kmem_cache_destroy(arm_pmu->pmu.task_ctx_cache);
}
static int brbe_attributes_probe(struct arm_pmu *armpmu, u32 brbe)
@@ -80,12 +80,25 @@
* --------------------------------- ------
*/
#define BRBE_BANK_MAX_ENTRIES 32
+#define BRBE_MAX_BANK 2
+#define BRBE_MAX_ENTRIES (BRBE_BANK_MAX_ENTRIES * BRBE_MAX_BANK)
#define BRBE_BANK0_IDX_MIN 0
#define BRBE_BANK0_IDX_MAX 31
#define BRBE_BANK1_IDX_MIN 32
#define BRBE_BANK1_IDX_MAX 63
+struct brbe_regset {
+ unsigned long brbsrc;
+ unsigned long brbtgt;
+ unsigned long brbinf;
+};
+
+struct arm64_perf_task_context {
+ struct brbe_regset store[BRBE_MAX_ENTRIES];
+ int nr_brbe_records;
+};
+
struct brbe_hw_attr {
int brbe_version;
int brbe_cc;
@@ -1022,8 +1022,12 @@ static int __armv8_pmuv3_map_event(struct perf_event *event,
hw_event_id = __armv8_pmuv3_map_event_id(armpmu, event);
- if (has_branch_stack(event) && !armv8pmu_branch_valid(event))
- return -EOPNOTSUPP;
+ if (has_branch_stack(event)) {
+ if (!armv8pmu_branch_valid(event))
+ return -EOPNOTSUPP;
+
+ event->attach_state |= PERF_ATTACH_TASK_DATA;
+ }
/*
* CHAIN events only work when paired with an adjacent counter, and it