diff mbox series

[v2] perf: arm_cspmu: Separate Arm and vendor module

Message ID	20230418062030.45620-1-bwicaksono@nvidia.com
State	New
Headers	Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) client-ip=2620:137:e000::1:20; Received-SPF: Pass (protection.outlook.com: domain of nvidia.com designates 216.228.117.161 as permitted sender) receiver=protection.outlook.com; client-ip=216.228.117.161; helo=mail.nvidia.com; pr=C From: Besar Wicaksono <bwicaksono@nvidia.com> To: <suzuki.poulose@arm.com>, <catalin.marinas@arm.com>, <will@kernel.org>, <mark.rutland@arm.com> CC: <linux-arm-kernel@lists.infradead.org>, <linux-kernel@vger.kernel.org>, <linux-tegra@vger.kernel.org>, <treding@nvidia.com>, <jonathanh@nvidia.com>, <vsethi@nvidia.com>, <rwiley@nvidia.com>, <efunsten@nvidia.com>, "Besar Wicaksono" <bwicaksono@nvidia.com> Subject: [PATCH v2] perf: arm_cspmu: Separate Arm and vendor module Date: Tue, 18 Apr 2023 01:20:30 -0500 Message-ID: <20230418062030.45620-1-bwicaksono@nvidia.com> MIME-Version: 1.0 Content-Type: text/plain Precedence: bulk
Series	[v2] perf: arm_cspmu: Separate Arm and vendor module \| [v2] perf: arm_cspmu: Separate Arm and vendor module

Commit Message

Besar Wicaksono April 18, 2023, 6:20 a.m. UTC

  Arm Coresight PMU driver consists of main standard code and vendor
backend code. Both are currently built as a single module.
This patch adds vendor registration API to separate the two to
keep things modular. Vendor module shall register to the main
module on loading and trigger device reprobe.

Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
---

Changes from v1:
 * Added separate Kconfig entry for nvidia backend
 * Added lock to protect accesses to the lists
 * Added support for matching subset devices from a vendor
 * Added state tracking to avoid reprobe when a device is in use
v1: ttps://lore.kernel.org/linux-arm-kernel/20230403163905.20354-1-bwicaksono@nvidia.com/T/#u

---
 drivers/perf/arm_cspmu/Kconfig        |   9 +-
 drivers/perf/arm_cspmu/Makefile       |   6 +-
 drivers/perf/arm_cspmu/arm_cspmu.c    | 280 +++++++++++++++++++++++---
 drivers/perf/arm_cspmu/arm_cspmu.h    |  32 ++-
 drivers/perf/arm_cspmu/nvidia_cspmu.c |  39 +++-
 drivers/perf/arm_cspmu/nvidia_cspmu.h |  17 --
 6 files changed, 325 insertions(+), 58 deletions(-)
 delete mode 100644 drivers/perf/arm_cspmu/nvidia_cspmu.h


base-commit: 73f2c2a7e1d2b31fdd5faa6dfa151c437a6c0a5a
prerequisite-patch-id: fb691dc01d87597bcbaa4d352073304287c20f73

Comments

Suzuki K Poulose April 18, 2023, 11:07 a.m. UTC | #1

On 18/04/2023 07:20, Besar Wicaksono wrote:
> Arm Coresight PMU driver consists of main standard code and vendor
> backend code. Both are currently built as a single module.
> This patch adds vendor registration API to separate the two to
> keep things modular. Vendor module shall register to the main
> module on loading and trigger device reprobe.
> 
> Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
> ---
> 
> Changes from v1:
>   * Added separate Kconfig entry for nvidia backend
>   * Added lock to protect accesses to the lists
>   * Added support for matching subset devices from a vendor
>   * Added state tracking to avoid reprobe when a device is in use
> v1: ttps://lore.kernel.org/linux-arm-kernel/20230403163905.20354-1-bwicaksono@nvidia.com/T/#u
> 
> ---
>   drivers/perf/arm_cspmu/Kconfig        |   9 +-
>   drivers/perf/arm_cspmu/Makefile       |   6 +-
>   drivers/perf/arm_cspmu/arm_cspmu.c    | 280 +++++++++++++++++++++++---
>   drivers/perf/arm_cspmu/arm_cspmu.h    |  32 ++-
>   drivers/perf/arm_cspmu/nvidia_cspmu.c |  39 +++-
>   drivers/perf/arm_cspmu/nvidia_cspmu.h |  17 --
>   6 files changed, 325 insertions(+), 58 deletions(-)
>   delete mode 100644 drivers/perf/arm_cspmu/nvidia_cspmu.h
> 
> diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig
> index 0b316fe69a45..8ce7b45a0075 100644
> --- a/drivers/perf/arm_cspmu/Kconfig
> +++ b/drivers/perf/arm_cspmu/Kconfig
> @@ -1,6 +1,6 @@
>   # SPDX-License-Identifier: GPL-2.0
>   #
> -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
> +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
>   
>   config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
>   	tristate "ARM Coresight Architecture PMU"
> @@ -11,3 +11,10 @@ config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
>   	  based on ARM CoreSight PMU architecture. Note that this PMU
>   	  architecture does not have relationship with the ARM CoreSight
>   	  Self-Hosted Tracing.
> +
> +config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> +	tristate "NVIDIA Coresight Architecture PMU"
> +	depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> +	help
> +	  Provides NVIDIA specific attributes for performance monitoring unit
> +	  (PMU) devices based on ARM CoreSight PMU architecture.
> diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile
> index fedb17df982d..f8ae22411d59 100644
> --- a/drivers/perf/arm_cspmu/Makefile
> +++ b/drivers/perf/arm_cspmu/Makefile
> @@ -1,6 +1,6 @@
> -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
> +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
>   #
>   # SPDX-License-Identifier: GPL-2.0
>   
> -obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
> -arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
> +obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu.o
> +obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += nvidia_cspmu.o
> diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
> index e31302ab7e37..c55ea2b74454 100644
> --- a/drivers/perf/arm_cspmu/arm_cspmu.c
> +++ b/drivers/perf/arm_cspmu/arm_cspmu.c
> @@ -16,7 +16,7 @@
>    * The user should refer to the vendor technical documentation to get details
>    * about the supported events.
>    *
> - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
> + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
>    *
>    */
>   
> @@ -25,13 +25,14 @@
>   #include <linux/ctype.h>
>   #include <linux/interrupt.h>
>   #include <linux/io-64-nonatomic-lo-hi.h>
> +#include <linux/list.h>
>   #include <linux/module.h>
> +#include <linux/mutex.h>
>   #include <linux/perf_event.h>
>   #include <linux/platform_device.h>
>   #include <acpi/processor.h>
>   
>   #include "arm_cspmu.h"
> -#include "nvidia_cspmu.h"
>   
>   #define PMUNAME "arm_cspmu"
>   #define DRVNAME "arm-cs-arch-pmu"
> @@ -117,11 +118,52 @@
>    */
>   #define HILOHI_MAX_POLL	1000
>   
> -/* JEDEC-assigned JEP106 identification code */
> -#define ARM_CSPMU_IMPL_ID_NVIDIA		0x36B
> -
>   static unsigned long arm_cspmu_cpuhp_state;
>   
> +/* List of Coresight PMU instances in the system. */
> +static LIST_HEAD(arm_cspmus);
> +
> +/* List of registered vendor backends. */
> +static LIST_HEAD(arm_cspmu_impls);
> +
> +static DEFINE_MUTEX(arm_cspmu_lock);
> +
> +/*
> + * State of the generic driver.
> + * 0 => registering backend.
> + * 1 => ready to use.
> + * 2 or more => in use.
> + */
> +#define ARM_CSPMU_STATE_REG	0
> +#define ARM_CSPMU_STATE_READY	1
> +static atomic_t arm_cspmu_state;
> +
> +static void arm_cspmu_state_ready(void)
> +{
> +	atomic_set(&arm_cspmu_state, ARM_CSPMU_STATE_READY);
> +}
> +
> +static bool try_arm_cspmu_state_reg(void)
> +{
> +	const int old = ARM_CSPMU_STATE_READY;
> +	const int new = ARM_CSPMU_STATE_REG;
> +
> +	return atomic_cmpxchg(&arm_cspmu_state, old, new) == old;
> +}
> +
> +static bool try_arm_cspmu_state_get(void)
> +{
> +	return atomic_inc_not_zero(&arm_cspmu_state);
> +}
> +
> +static void arm_cspmu_state_put(void)
> +{
> +	int ret;
> +
> +	ret = atomic_dec_if_positive(&arm_cspmu_state);
> +	WARN_ON(ret < 0);
> +}
> +

As long as the vendor module is set for the PMU instance, it won't be 
unloaded as long as there are any perf events and thus the specific
driver cannot be unloaded. So, you don't need explicit refcount
maintenance for each pmu callbacks.

>   /*
>    * In CoreSight PMU architecture, all of the MMIO registers are 32-bit except
>    * counter register. The counter register can be implemented as 32-bit or 64-bit
> @@ -380,26 +422,161 @@ static struct attribute_group arm_cspmu_cpumask_attr_group = {
>   };
>   
>   struct impl_match {
> -	u32 pmiidr;
> -	u32 mask;
> -	int (*impl_init_ops)(struct arm_cspmu *cspmu);
> +	struct list_head next;
> +	struct arm_cspmu_impl_param param;
>   };
>   
> -static const struct impl_match impl_match[] = {
> -	{
> -	  .pmiidr = ARM_CSPMU_IMPL_ID_NVIDIA,
> -	  .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
> -	  .impl_init_ops = nv_cspmu_init_ops
> -	},
> -	{}
> -};
> +static struct arm_cspmu_impl_param to_impl_param(const struct arm_cspmu *cspmu)
> +{
> +	struct arm_cspmu_impl_param ret = {0};
> +	u32 pmiidr = cspmu->impl.pmiidr;
> +
> +	ret.impl_id = FIELD_GET(ARM_CSPMU_PMIIDR_IMPLEMENTER, pmiidr);
> +	ret.pvr = FIELD_GET(ARM_CSPMU_PMIIDR_PVR, pmiidr);
> +	ret.pvr_mask = GENMASK(31, 0);
> +
> +	return ret;
> +}
> +
> +static bool impl_param_match(const struct arm_cspmu_impl_param *A,
> +			     const struct arm_cspmu_impl_param *B)
> +{
> +	/*
> +	 * Match criteria:
> +	 * - Implementer id should match.
> +	 * - A's device id is within B's range, or vice versa. This allows
> +	 *   vendor to register backend for a range of devices.
> +	 */
> +	if ((A->impl_id == B->impl_id) &&
> +	    (((A->pvr & A->pvr_mask) == (B->pvr & A->pvr_mask)) ||
> +	     ((A->pvr & B->pvr_mask) == (B->pvr & B->pvr_mask))))
> +		return true;
> +

nit: Please do not use CAPITAL letters for variable names. Could this
simply accept a pmiidr and a impl_match and match the fields with that
of the mask/value pair. See more below.


> +	return false;
> +}
> +
> +static struct impl_match *impl_match_find(
> +	const struct arm_cspmu_impl_param *impl_param)
> +{
> +	struct impl_match *impl_match;
> +
> +	list_for_each_entry(impl_match, &arm_cspmu_impls, next) {
> +		if (impl_param_match(impl_param, &impl_match->param))
> +			return impl_match;
> +	}
> +
> +	return NULL;
> +}
> +
> +static int arm_cspmu_impl_reprobe(
> +	const struct arm_cspmu_impl_param *impl_param)
> +{
> +	struct arm_cspmu *cspmu, *temp;
> +	LIST_HEAD(reprobe_list);
> +	int ret = 0;
> +
> +	mutex_lock(&arm_cspmu_lock);
> +
> +	/* Move the matching devices to temp list to avoid recursive lock. */
> +	list_for_each_entry_safe(cspmu, temp, &arm_cspmus, next) {
> +		struct arm_cspmu_impl_param match_param = to_impl_param(cspmu);

Also, does this work if the pvr and pvr_mask were provided by the
backend driver ? to_impl_param() takes the pmiidr which is either
read from the device or from the ACPI table, unfiltered. Could we
not change impl_param_match() to :
		impl_param_match(cspmu->impl.pmiidr, impl_param) ?

> +
> +		if (impl_param_match(impl_param, &match_param))
> +			list_move(&cspmu->next, &reprobe_list);
> +	}
> +
> +	mutex_unlock(&arm_cspmu_lock);
> +
> +	/* Reprobe the devices. */
> +	list_for_each_entry_safe(cspmu, temp, &reprobe_list, next) {
> +		ret = device_reprobe(cspmu->dev);
> +		if (ret) {
> +			pr_err("arm_cspmu fail reprobe err: %d\n", ret);
> +			return ret;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +int arm_cspmu_impl_register(const struct arm_cspmu_impl_param *impl_param)
> +{
> +	struct impl_match *match;
> +	int ret = 0;
> +
> +	if (!try_arm_cspmu_state_reg()) {
> +		pr_err("arm_cspmu reg failed, device(s) is in use\n");
> +		return -EBUSY;
> +	}
> +
> +	mutex_lock(&arm_cspmu_lock);
> +
> +	match = impl_match_find(impl_param);
> +	if (match) {
> +		pr_err("arm_cspmu reg failed, impl: 0x%x, pvr: 0x%x, pvr_mask: 0x%x already exists\n",
> +			match->param.impl_id, match->param.pvr,
> +			match->param.pvr_mask);
> +		mutex_unlock(&arm_cspmu_lock);
> +		arm_cspmu_state_ready();
> +		return -EINVAL;
> +	}
> +
> +	match = kzalloc(sizeof(struct impl_match), GFP_KERNEL);
> +	if (!match) {
> +		mutex_unlock(&arm_cspmu_lock);
> +		arm_cspmu_state_ready();
> +		return -ENOMEM;
> +	}
> +
> +	memcpy(&match->param, impl_param, sizeof(match->param));

nit: 	match->param = *impl_param; ?

> +	list_add(&match->next, &arm_cspmu_impls);
> +
> +	mutex_unlock(&arm_cspmu_lock);
> +
> +	/* Replace generic backend with vendor implementation. */
> +	ret = arm_cspmu_impl_reprobe(impl_param);
> +
> +	if (ret)
> +		arm_cspmu_impl_unregister(impl_param);
> +
> +	arm_cspmu_state_ready();
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(arm_cspmu_impl_register);
> +
> +void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_param *impl_param)
> +{
> +	struct impl_match *match;
> +
> +	mutex_lock(&arm_cspmu_lock);
> +
> +	match = impl_match_find(impl_param);
> +	if (!match) {
> +		pr_err("arm_cspmu unreg failed, unable to find impl: 0x%x, pvr: 0x%x, pvr_mask: 0x%x\n",
> +			impl_param->impl_id, impl_param->pvr,
> +			impl_param->pvr_mask);
> +		mutex_unlock(&arm_cspmu_lock);
> +		return;
> +	}
> +
> +	list_del(&match->next);
> +	kfree(match);
> +
> +	mutex_unlock(&arm_cspmu_lock);
> +
> +	/* Re-attach devices to standard driver. */
> +	arm_cspmu_impl_reprobe(impl_param);
> +}
> +EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister);
>   
>   static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
>   {
> -	int ret;
> +	int ret = 0;
>   	struct acpi_apmt_node *apmt_node = cspmu->apmt_node;
>   	struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
> -	const struct impl_match *match = impl_match;
> +	struct arm_cspmu_impl_param match_param = {0};
> +	const struct impl_match *match;
>   
>   	/*
>   	 * Get PMU implementer and product id from APMT node.
> @@ -410,19 +587,23 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
>   		(apmt_node->impl_id) ? apmt_node->impl_id :
>   				       readl(cspmu->base0 + PMIIDR);
>   
> -	/* Find implementer specific attribute ops. */
> -	for (; match->pmiidr; match++) {
> -		const u32 mask = match->mask;
> +	cspmu->impl.module = THIS_MODULE;
>   
> -		if ((match->pmiidr & mask) == (cspmu->impl.pmiidr & mask)) {
> -			ret = match->impl_init_ops(cspmu);
> -			if (ret)
> -				return ret;
> +	mutex_lock(&arm_cspmu_lock);
>   
> -			break;
> -		}
> +	/* Find implementer specific attribute ops. */
> +	match_param = to_impl_param(cspmu);
> +	match = impl_match_find(&match_param);
> +	if (match) {
> +		cspmu->impl.module = match->param.module;
> +		ret = match->param.impl_init_ops(cspmu);
>   	}
>   
> +	mutex_unlock(&arm_cspmu_lock);
> +
> +	if (ret)
> +		return ret;
> +
>   	/* Use default callbacks if implementer doesn't provide one. */
>   	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs);
>   	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_format_attrs);
> @@ -639,6 +820,11 @@ static int arm_cspmu_event_init(struct perf_event *event)
>   	struct arm_cspmu *cspmu;
>   	struct hw_perf_event *hwc = &event->hw;
>   
> +	if (!try_arm_cspmu_state_get()) {
> +		pr_err("arm_cspmu event_init fail: driver is reprobing\n");
> +		return -EBUSY;
> +	}
> +
>   	cspmu = to_arm_cspmu(event->pmu);
>   
>   	/*
> @@ -648,12 +834,14 @@ static int arm_cspmu_event_init(struct perf_event *event)
>   	if (is_sampling_event(event)) {
>   		dev_dbg(cspmu->pmu.dev,
>   			"Can't support sampling events\n");
> +		arm_cspmu_state_put();
>   		return -EOPNOTSUPP;
>   	}
>   
>   	if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) {
>   		dev_dbg(cspmu->pmu.dev,
>   			"Can't support per-task counters\n");
> +		arm_cspmu_state_put();
>   		return -EINVAL;
>   	}
>   
> @@ -664,16 +852,21 @@ static int arm_cspmu_event_init(struct perf_event *event)
>   	if (!cpumask_test_cpu(event->cpu, &cspmu->associated_cpus)) {
>   		dev_dbg(cspmu->pmu.dev,
>   			"Requested cpu is not associated with the PMU\n");
> +		arm_cspmu_state_put();
>   		return -EINVAL;
>   	}
>   
>   	/* Enforce the current active CPU to handle the events in this PMU. */
>   	event->cpu = cpumask_first(&cspmu->active_cpu);
> -	if (event->cpu >= nr_cpu_ids)
> +	if (event->cpu >= nr_cpu_ids) {
> +		arm_cspmu_state_put();
>   		return -EINVAL;
> +	}
>   
> -	if (!arm_cspmu_validate_group(event))
> +	if (!arm_cspmu_validate_group(event)) {
> +		arm_cspmu_state_put();
>   		return -EINVAL;
> +	}
>   
>   	/*
>   	 * The logical counter id is tracked with hw_perf_event.extra_reg.idx.
> @@ -686,6 +879,8 @@ static int arm_cspmu_event_init(struct perf_event *event)
>   	hwc->extra_reg.idx = -1;
>   	hwc->config = cspmu->impl.ops.event_type(event);
>   
> +	arm_cspmu_state_put();
> +
>   	return 0;
>   }
>   
> @@ -864,13 +1059,22 @@ static int arm_cspmu_add(struct perf_event *event, int flags)
>   	struct hw_perf_event *hwc = &event->hw;
>   	int idx;
>   
> +	if (!try_arm_cspmu_state_get()) {
> +		pr_err("arm_cspmu event_init fail: driver is reprobing\n");
> +		return -EBUSY;
> +	}
> +
>   	if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
> -					   &cspmu->associated_cpus)))
> +					   &cspmu->associated_cpus))) {
> +		arm_cspmu_state_put();
>   		return -ENOENT;
> +	}
>   
>   	idx = arm_cspmu_get_event_idx(hw_events, event);
> -	if (idx < 0)
> +	if (idx < 0) {
> +		arm_cspmu_state_put();
>   		return idx;
> +	}
>   
>   	hw_events->events[idx] = event;
>   	hwc->idx = to_phys_idx(cspmu, idx);
> @@ -900,6 +1104,8 @@ static void arm_cspmu_del(struct perf_event *event, int flags)
>   	clear_bit(idx, hw_events->used_ctrs);
>   
>   	perf_event_update_userpage(event);
> +
> +	arm_cspmu_state_put();
>   }
>   
>   static void arm_cspmu_read(struct perf_event *event)
> @@ -1154,7 +1360,7 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
>   
>   	cspmu->pmu = (struct pmu){
>   		.task_ctx_nr	= perf_invalid_context,
> -		.module		= THIS_MODULE,
> +		.module		= cspmu->impl.module,
>   		.pmu_enable	= arm_cspmu_enable,
>   		.pmu_disable	= arm_cspmu_disable,
>   		.event_init	= arm_cspmu_event_init,
> @@ -1205,6 +1411,10 @@ static int arm_cspmu_device_probe(struct platform_device *pdev)
>   	if (ret)
>   		return ret;
>   
> +	mutex_lock(&arm_cspmu_lock);
> +	list_add(&cspmu->next, &arm_cspmus);
> +	mutex_unlock(&arm_cspmu_lock);
> +
>   	return 0;
>   }
>   
> @@ -1212,6 +1422,10 @@ static int arm_cspmu_device_remove(struct platform_device *pdev)
>   {
>   	struct arm_cspmu *cspmu = platform_get_drvdata(pdev);
>   
> +	mutex_lock(&arm_cspmu_lock);
> +	list_del(&cspmu->next);
> +	mutex_unlock(&arm_cspmu_lock);
> +
>   	perf_pmu_unregister(&cspmu->pmu);
>   	cpuhp_state_remove_instance(arm_cspmu_cpuhp_state, &cspmu->cpuhp_node);
>   
> @@ -1281,6 +1495,8 @@ static int __init arm_cspmu_init(void)
>   {
>   	int ret;
>   
> +	arm_cspmu_state_ready();
> +
>   	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
>   					"perf/arm/cspmu:online",
>   					arm_cspmu_cpu_online,
> diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h
> index 51323b175a4a..cf3458d9fc63 100644
> --- a/drivers/perf/arm_cspmu/arm_cspmu.h
> +++ b/drivers/perf/arm_cspmu/arm_cspmu.h
> @@ -1,7 +1,7 @@
>   /* SPDX-License-Identifier: GPL-2.0
>    *
>    * ARM CoreSight Architecture PMU driver.
> - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
> + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
>    *
>    */
>   
> @@ -68,7 +68,10 @@
>   
>   /* PMIIDR register field */
>   #define ARM_CSPMU_PMIIDR_IMPLEMENTER	GENMASK(11, 0)
> +#define ARM_CSPMU_PMIIDR_REVISION	GENMASK(15, 12)
> +#define ARM_CSPMU_PMIIDR_VARIANT	GENMASK(19, 16)
>   #define ARM_CSPMU_PMIIDR_PRODUCTID	GENMASK(31, 20)
> +#define ARM_CSPMU_PMIIDR_PVR		GENMASK(31, 12)
>   
>   struct arm_cspmu;
>   
> @@ -107,15 +110,36 @@ struct arm_cspmu_impl_ops {
>   					 struct attribute *attr, int unused);
>   };
>   
> +/* Vendor/implementer registration parameter. */
> +struct arm_cspmu_impl_param {
> +	/* JEDEC assigned implementer id of the vendor. */
> +	u32 impl_id;
> +	/*
> +	 * The pvr value and mask describes the device ids covered by the
> +	 * vendor backend. pvr contains the pattern of acceptable product,
> +	 * variant, and revision bits from device's PMIIDR. pvr_mask contains
> +	 * the relevant bits when comparing pvr. 0 value on the mask means any
> +	 * pvr value is supported.
> +	 */
> +	u32 pvr;
> +	u32 pvr_mask;

Do we need to separate pvr from the vendor_id ? we could simply have:

pmiidr_val; /* includes Vendor id and any other fields of the pmiidr */
pmiidr_mask;

Rest looks fine to me.

Suzuki

Besar Wicaksono April 18, 2023, 7:33 p.m. UTC | #2

Hi Suzuki,

> -----Original Message-----
> From: Suzuki K Poulose <suzuki.poulose@arm.com>
> Sent: Tuesday, April 18, 2023 6:07 AM
> To: Besar Wicaksono <bwicaksono@nvidia.com>; catalin.marinas@arm.com;
> will@kernel.org; mark.rutland@arm.com
> Cc: linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org; linux-
> tegra@vger.kernel.org; Thierry Reding <treding@nvidia.com>; Jonathan
> Hunter <jonathanh@nvidia.com>; Vikram Sethi <vsethi@nvidia.com>; Richard
> Wiley <rwiley@nvidia.com>; Eric Funsten <efunsten@nvidia.com>
> Subject: Re: [PATCH v2] perf: arm_cspmu: Separate Arm and vendor module
> 
> External email: Use caution opening links or attachments
> 
> 
> On 18/04/2023 07:20, Besar Wicaksono wrote:
> > Arm Coresight PMU driver consists of main standard code and vendor
> > backend code. Both are currently built as a single module.
> > This patch adds vendor registration API to separate the two to
> > keep things modular. Vendor module shall register to the main
> > module on loading and trigger device reprobe.
> >
> > Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
> > ---
> >
> > Changes from v1:
> >   * Added separate Kconfig entry for nvidia backend
> >   * Added lock to protect accesses to the lists
> >   * Added support for matching subset devices from a vendor
> >   * Added state tracking to avoid reprobe when a device is in use
> > v1: ttps://lore.kernel.org/linux-arm-kernel/20230403163905.20354-1-
> bwicaksono@nvidia.com/T/#u
> >
> > ---
> >   drivers/perf/arm_cspmu/Kconfig        |   9 +-
> >   drivers/perf/arm_cspmu/Makefile       |   6 +-
> >   drivers/perf/arm_cspmu/arm_cspmu.c    | 280
> +++++++++++++++++++++++---
> >   drivers/perf/arm_cspmu/arm_cspmu.h    |  32 ++-
> >   drivers/perf/arm_cspmu/nvidia_cspmu.c |  39 +++-
> >   drivers/perf/arm_cspmu/nvidia_cspmu.h |  17 --
> >   6 files changed, 325 insertions(+), 58 deletions(-)
> >   delete mode 100644 drivers/perf/arm_cspmu/nvidia_cspmu.h
> >
> > diff --git a/drivers/perf/arm_cspmu/Kconfig
> b/drivers/perf/arm_cspmu/Kconfig
> > index 0b316fe69a45..8ce7b45a0075 100644
> > --- a/drivers/perf/arm_cspmu/Kconfig
> > +++ b/drivers/perf/arm_cspmu/Kconfig
> > @@ -1,6 +1,6 @@
> >   # SPDX-License-Identifier: GPL-2.0
> >   #
> > -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> > +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> >
> >   config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> >       tristate "ARM Coresight Architecture PMU"
> > @@ -11,3 +11,10 @@ config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> >         based on ARM CoreSight PMU architecture. Note that this PMU
> >         architecture does not have relationship with the ARM CoreSight
> >         Self-Hosted Tracing.
> > +
> > +config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> > +     tristate "NVIDIA Coresight Architecture PMU"
> > +     depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> > +     help
> > +       Provides NVIDIA specific attributes for performance monitoring unit
> > +       (PMU) devices based on ARM CoreSight PMU architecture.
> > diff --git a/drivers/perf/arm_cspmu/Makefile
> b/drivers/perf/arm_cspmu/Makefile
> > index fedb17df982d..f8ae22411d59 100644
> > --- a/drivers/perf/arm_cspmu/Makefile
> > +++ b/drivers/perf/arm_cspmu/Makefile
> > @@ -1,6 +1,6 @@
> > -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> > +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> >   #
> >   # SPDX-License-Identifier: GPL-2.0
> >
> > -obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) +=
> arm_cspmu_module.o
> > -arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
> > +obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) +=
> arm_cspmu.o
> > +obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) +=
> nvidia_cspmu.o
> > diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c
> b/drivers/perf/arm_cspmu/arm_cspmu.c
> > index e31302ab7e37..c55ea2b74454 100644
> > --- a/drivers/perf/arm_cspmu/arm_cspmu.c
> > +++ b/drivers/perf/arm_cspmu/arm_cspmu.c
> > @@ -16,7 +16,7 @@
> >    * The user should refer to the vendor technical documentation to get
> details
> >    * about the supported events.
> >    *
> > - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> > + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> >    *
> >    */
> >
> > @@ -25,13 +25,14 @@
> >   #include <linux/ctype.h>
> >   #include <linux/interrupt.h>
> >   #include <linux/io-64-nonatomic-lo-hi.h>
> > +#include <linux/list.h>
> >   #include <linux/module.h>
> > +#include <linux/mutex.h>
> >   #include <linux/perf_event.h>
> >   #include <linux/platform_device.h>
> >   #include <acpi/processor.h>
> >
> >   #include "arm_cspmu.h"
> > -#include "nvidia_cspmu.h"
> >
> >   #define PMUNAME "arm_cspmu"
> >   #define DRVNAME "arm-cs-arch-pmu"
> > @@ -117,11 +118,52 @@
> >    */
> >   #define HILOHI_MAX_POLL     1000
> >
> > -/* JEDEC-assigned JEP106 identification code */
> > -#define ARM_CSPMU_IMPL_ID_NVIDIA             0x36B
> > -
> >   static unsigned long arm_cspmu_cpuhp_state;
> >
> > +/* List of Coresight PMU instances in the system. */
> > +static LIST_HEAD(arm_cspmus);
> > +
> > +/* List of registered vendor backends. */
> > +static LIST_HEAD(arm_cspmu_impls);
> > +
> > +static DEFINE_MUTEX(arm_cspmu_lock);
> > +
> > +/*
> > + * State of the generic driver.
> > + * 0 => registering backend.
> > + * 1 => ready to use.
> > + * 2 or more => in use.
> > + */
> > +#define ARM_CSPMU_STATE_REG  0
> > +#define ARM_CSPMU_STATE_READY        1
> > +static atomic_t arm_cspmu_state;
> > +
> > +static void arm_cspmu_state_ready(void)
> > +{
> > +     atomic_set(&arm_cspmu_state, ARM_CSPMU_STATE_READY);
> > +}
> > +
> > +static bool try_arm_cspmu_state_reg(void)
> > +{
> > +     const int old = ARM_CSPMU_STATE_READY;
> > +     const int new = ARM_CSPMU_STATE_REG;
> > +
> > +     return atomic_cmpxchg(&arm_cspmu_state, old, new) == old;
> > +}
> > +
> > +static bool try_arm_cspmu_state_get(void)
> > +{
> > +     return atomic_inc_not_zero(&arm_cspmu_state);
> > +}
> > +
> > +static void arm_cspmu_state_put(void)
> > +{
> > +     int ret;
> > +
> > +     ret = atomic_dec_if_positive(&arm_cspmu_state);
> > +     WARN_ON(ret < 0);
> > +}
> > +
> 
> As long as the vendor module is set for the PMU instance, it won't be
> unloaded as long as there are any perf events and thus the specific
> driver cannot be unloaded. So, you don't need explicit refcount
> maintenance for each pmu callbacks.
> 

The arm_cspmu_state mainly protects during new backend registration when
the device is attached to standard implementation. All devices are attached to
standard implementation initially when arm_cspmu module is loaded, since there
is no backend yet. On backend registration, the standard impl is replaced by
backend impl. However, the module unloading mechanism doesn't provide
protection because standard impl is owned by arm_cspmu module, which
is not unloaded during registration.

The refcount may not be required if the devices are not attached to standard
Implementation by default, and the unreg doesn't fallback to it. But that makes
the devices usable only when there is a vendor backend available. 

> >   /*
> >    * In CoreSight PMU architecture, all of the MMIO registers are 32-bit
> except
> >    * counter register. The counter register can be implemented as 32-bit or
> 64-bit
> > @@ -380,26 +422,161 @@ static struct attribute_group
> arm_cspmu_cpumask_attr_group = {
> >   };
> >
> >   struct impl_match {
> > -     u32 pmiidr;
> > -     u32 mask;
> > -     int (*impl_init_ops)(struct arm_cspmu *cspmu);
> > +     struct list_head next;
> > +     struct arm_cspmu_impl_param param;
> >   };
> >
> > -static const struct impl_match impl_match[] = {
> > -     {
> > -       .pmiidr = ARM_CSPMU_IMPL_ID_NVIDIA,
> > -       .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
> > -       .impl_init_ops = nv_cspmu_init_ops
> > -     },
> > -     {}
> > -};
> > +static struct arm_cspmu_impl_param to_impl_param(const struct
> arm_cspmu *cspmu)
> > +{
> > +     struct arm_cspmu_impl_param ret = {0};
> > +     u32 pmiidr = cspmu->impl.pmiidr;
> > +
> > +     ret.impl_id = FIELD_GET(ARM_CSPMU_PMIIDR_IMPLEMENTER, pmiidr);
> > +     ret.pvr = FIELD_GET(ARM_CSPMU_PMIIDR_PVR, pmiidr);
> > +     ret.pvr_mask = GENMASK(31, 0);
> > +
> > +     return ret;
> > +}
> > +
> > +static bool impl_param_match(const struct arm_cspmu_impl_param *A,
> > +                          const struct arm_cspmu_impl_param *B)
> > +{
> > +     /*
> > +      * Match criteria:
> > +      * - Implementer id should match.
> > +      * - A's device id is within B's range, or vice versa. This allows
> > +      *   vendor to register backend for a range of devices.
> > +      */
> > +     if ((A->impl_id == B->impl_id) &&
> > +         (((A->pvr & A->pvr_mask) == (B->pvr & A->pvr_mask)) ||
> > +          ((A->pvr & B->pvr_mask) == (B->pvr & B->pvr_mask))))
> > +             return true;
> > +
> 
> nit: Please do not use CAPITAL letters for variable names. Could this
> simply accept a pmiidr and a impl_match and match the fields with that
> of the mask/value pair. See more below.
> 

The bitfield comparison in impl_param_match can be reused for different purposes:
1. Test between two mask/value pairs to check if device id ranges from
     both pairs are intersecting. We use it during validation in (un)registration.
2. Along with to_impl_param, we can compare a pmiidr value with a
     mask/value pair to check if a device is compatible with a backend. We use
     it during reprobe and initializing impl_ops.

Got it on the CAPITAL letters, I will change on next revision.

> 
> > +     return false;
> > +}
> > +
> > +static struct impl_match *impl_match_find(
> > +     const struct arm_cspmu_impl_param *impl_param)
> > +{
> > +     struct impl_match *impl_match;
> > +
> > +     list_for_each_entry(impl_match, &arm_cspmu_impls, next) {
> > +             if (impl_param_match(impl_param, &impl_match->param))
> > +                     return impl_match;
> > +     }
> > +
> > +     return NULL;
> > +}
> > +
> > +static int arm_cspmu_impl_reprobe(
> > +     const struct arm_cspmu_impl_param *impl_param)
> > +{
> > +     struct arm_cspmu *cspmu, *temp;
> > +     LIST_HEAD(reprobe_list);
> > +     int ret = 0;
> > +
> > +     mutex_lock(&arm_cspmu_lock);
> > +
> > +     /* Move the matching devices to temp list to avoid recursive lock. */
> > +     list_for_each_entry_safe(cspmu, temp, &arm_cspmus, next) {
> > +             struct arm_cspmu_impl_param match_param =
> to_impl_param(cspmu);
> 
> Also, does this work if the pvr and pvr_mask were provided by the
> backend driver ? to_impl_param() takes the pmiidr which is either
> read from the device or from the ACPI table, unfiltered. Could we
> not change impl_param_match() to :
>                 impl_param_match(cspmu->impl.pmiidr, impl_param) ?
> 

Please see my comment above.

> > +
> > +             if (impl_param_match(impl_param, &match_param))
> > +                     list_move(&cspmu->next, &reprobe_list);
> > +     }
> > +
> > +     mutex_unlock(&arm_cspmu_lock);
> > +
> > +     /* Reprobe the devices. */
> > +     list_for_each_entry_safe(cspmu, temp, &reprobe_list, next) {
> > +             ret = device_reprobe(cspmu->dev);
> > +             if (ret) {
> > +                     pr_err("arm_cspmu fail reprobe err: %d\n", ret);
> > +                     return ret;
> > +             }
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +int arm_cspmu_impl_register(const struct arm_cspmu_impl_param
> *impl_param)
> > +{
> > +     struct impl_match *match;
> > +     int ret = 0;
> > +
> > +     if (!try_arm_cspmu_state_reg()) {
> > +             pr_err("arm_cspmu reg failed, device(s) is in use\n");
> > +             return -EBUSY;
> > +     }
> > +
> > +     mutex_lock(&arm_cspmu_lock);
> > +
> > +     match = impl_match_find(impl_param);
> > +     if (match) {
> > +             pr_err("arm_cspmu reg failed, impl: 0x%x, pvr: 0x%x, pvr_mask:
> 0x%x already exists\n",
> > +                     match->param.impl_id, match->param.pvr,
> > +                     match->param.pvr_mask);
> > +             mutex_unlock(&arm_cspmu_lock);
> > +             arm_cspmu_state_ready();
> > +             return -EINVAL;
> > +     }
> > +
> > +     match = kzalloc(sizeof(struct impl_match), GFP_KERNEL);
> > +     if (!match) {
> > +             mutex_unlock(&arm_cspmu_lock);
> > +             arm_cspmu_state_ready();
> > +             return -ENOMEM;
> > +     }
> > +
> > +     memcpy(&match->param, impl_param, sizeof(match->param));
> 
> nit:    match->param = *impl_param; ?

Ah yes!

> 
> > +     list_add(&match->next, &arm_cspmu_impls);
> > +
> > +     mutex_unlock(&arm_cspmu_lock);
> > +
> > +     /* Replace generic backend with vendor implementation. */
> > +     ret = arm_cspmu_impl_reprobe(impl_param);
> > +
> > +     if (ret)
> > +             arm_cspmu_impl_unregister(impl_param);
> > +
> > +     arm_cspmu_state_ready();
> > +
> > +     return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(arm_cspmu_impl_register);
> > +
> > +void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_param
> *impl_param)
> > +{
> > +     struct impl_match *match;
> > +
> > +     mutex_lock(&arm_cspmu_lock);
> > +
> > +     match = impl_match_find(impl_param);
> > +     if (!match) {
> > +             pr_err("arm_cspmu unreg failed, unable to find impl: 0x%x, pvr:
> 0x%x, pvr_mask: 0x%x\n",
> > +                     impl_param->impl_id, impl_param->pvr,
> > +                     impl_param->pvr_mask);
> > +             mutex_unlock(&arm_cspmu_lock);
> > +             return;
> > +     }
> > +
> > +     list_del(&match->next);
> > +     kfree(match);
> > +
> > +     mutex_unlock(&arm_cspmu_lock);
> > +
> > +     /* Re-attach devices to standard driver. */
> > +     arm_cspmu_impl_reprobe(impl_param);
> > +}
> > +EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister);
> >
> >   static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
> >   {
> > -     int ret;
> > +     int ret = 0;
> >       struct acpi_apmt_node *apmt_node = cspmu->apmt_node;
> >       struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
> > -     const struct impl_match *match = impl_match;
> > +     struct arm_cspmu_impl_param match_param = {0};
> > +     const struct impl_match *match;
> >
> >       /*
> >        * Get PMU implementer and product id from APMT node.
> > @@ -410,19 +587,23 @@ static int arm_cspmu_init_impl_ops(struct
> arm_cspmu *cspmu)
> >               (apmt_node->impl_id) ? apmt_node->impl_id :
> >                                      readl(cspmu->base0 + PMIIDR);
> >
> > -     /* Find implementer specific attribute ops. */
> > -     for (; match->pmiidr; match++) {
> > -             const u32 mask = match->mask;
> > +     cspmu->impl.module = THIS_MODULE;
> >
> > -             if ((match->pmiidr & mask) == (cspmu->impl.pmiidr & mask)) {
> > -                     ret = match->impl_init_ops(cspmu);
> > -                     if (ret)
> > -                             return ret;
> > +     mutex_lock(&arm_cspmu_lock);
> >
> > -                     break;
> > -             }
> > +     /* Find implementer specific attribute ops. */
> > +     match_param = to_impl_param(cspmu);
> > +     match = impl_match_find(&match_param);
> > +     if (match) {
> > +             cspmu->impl.module = match->param.module;
> > +             ret = match->param.impl_init_ops(cspmu);
> >       }
> >
> > +     mutex_unlock(&arm_cspmu_lock);
> > +
> > +     if (ret)
> > +             return ret;
> > +
> >       /* Use default callbacks if implementer doesn't provide one. */
> >       CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs);
> >       CHECK_DEFAULT_IMPL_OPS(impl_ops, get_format_attrs);
> > @@ -639,6 +820,11 @@ static int arm_cspmu_event_init(struct perf_event
> *event)
> >       struct arm_cspmu *cspmu;
> >       struct hw_perf_event *hwc = &event->hw;
> >
> > +     if (!try_arm_cspmu_state_get()) {
> > +             pr_err("arm_cspmu event_init fail: driver is reprobing\n");
> > +             return -EBUSY;
> > +     }
> > +
> >       cspmu = to_arm_cspmu(event->pmu);
> >
> >       /*
> > @@ -648,12 +834,14 @@ static int arm_cspmu_event_init(struct
> perf_event *event)
> >       if (is_sampling_event(event)) {
> >               dev_dbg(cspmu->pmu.dev,
> >                       "Can't support sampling events\n");
> > +             arm_cspmu_state_put();
> >               return -EOPNOTSUPP;
> >       }
> >
> >       if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) {
> >               dev_dbg(cspmu->pmu.dev,
> >                       "Can't support per-task counters\n");
> > +             arm_cspmu_state_put();
> >               return -EINVAL;
> >       }
> >
> > @@ -664,16 +852,21 @@ static int arm_cspmu_event_init(struct
> perf_event *event)
> >       if (!cpumask_test_cpu(event->cpu, &cspmu->associated_cpus)) {
> >               dev_dbg(cspmu->pmu.dev,
> >                       "Requested cpu is not associated with the PMU\n");
> > +             arm_cspmu_state_put();
> >               return -EINVAL;
> >       }
> >
> >       /* Enforce the current active CPU to handle the events in this PMU. */
> >       event->cpu = cpumask_first(&cspmu->active_cpu);
> > -     if (event->cpu >= nr_cpu_ids)
> > +     if (event->cpu >= nr_cpu_ids) {
> > +             arm_cspmu_state_put();
> >               return -EINVAL;
> > +     }
> >
> > -     if (!arm_cspmu_validate_group(event))
> > +     if (!arm_cspmu_validate_group(event)) {
> > +             arm_cspmu_state_put();
> >               return -EINVAL;
> > +     }
> >
> >       /*
> >        * The logical counter id is tracked with hw_perf_event.extra_reg.idx.
> > @@ -686,6 +879,8 @@ static int arm_cspmu_event_init(struct perf_event
> *event)
> >       hwc->extra_reg.idx = -1;
> >       hwc->config = cspmu->impl.ops.event_type(event);
> >
> > +     arm_cspmu_state_put();
> > +
> >       return 0;
> >   }
> >
> > @@ -864,13 +1059,22 @@ static int arm_cspmu_add(struct perf_event
> *event, int flags)
> >       struct hw_perf_event *hwc = &event->hw;
> >       int idx;
> >
> > +     if (!try_arm_cspmu_state_get()) {
> > +             pr_err("arm_cspmu event_init fail: driver is reprobing\n");
> > +             return -EBUSY;
> > +     }
> > +
> >       if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
> > -                                        &cspmu->associated_cpus)))
> > +                                        &cspmu->associated_cpus))) {
> > +             arm_cspmu_state_put();
> >               return -ENOENT;
> > +     }
> >
> >       idx = arm_cspmu_get_event_idx(hw_events, event);
> > -     if (idx < 0)
> > +     if (idx < 0) {
> > +             arm_cspmu_state_put();
> >               return idx;
> > +     }
> >
> >       hw_events->events[idx] = event;
> >       hwc->idx = to_phys_idx(cspmu, idx);
> > @@ -900,6 +1104,8 @@ static void arm_cspmu_del(struct perf_event
> *event, int flags)
> >       clear_bit(idx, hw_events->used_ctrs);
> >
> >       perf_event_update_userpage(event);
> > +
> > +     arm_cspmu_state_put();
> >   }
> >
> >   static void arm_cspmu_read(struct perf_event *event)
> > @@ -1154,7 +1360,7 @@ static int arm_cspmu_register_pmu(struct
> arm_cspmu *cspmu)
> >
> >       cspmu->pmu = (struct pmu){
> >               .task_ctx_nr    = perf_invalid_context,
> > -             .module         = THIS_MODULE,
> > +             .module         = cspmu->impl.module,
> >               .pmu_enable     = arm_cspmu_enable,
> >               .pmu_disable    = arm_cspmu_disable,
> >               .event_init     = arm_cspmu_event_init,
> > @@ -1205,6 +1411,10 @@ static int arm_cspmu_device_probe(struct
> platform_device *pdev)
> >       if (ret)
> >               return ret;
> >
> > +     mutex_lock(&arm_cspmu_lock);
> > +     list_add(&cspmu->next, &arm_cspmus);
> > +     mutex_unlock(&arm_cspmu_lock);
> > +
> >       return 0;
> >   }
> >
> > @@ -1212,6 +1422,10 @@ static int arm_cspmu_device_remove(struct
> platform_device *pdev)
> >   {
> >       struct arm_cspmu *cspmu = platform_get_drvdata(pdev);
> >
> > +     mutex_lock(&arm_cspmu_lock);
> > +     list_del(&cspmu->next);
> > +     mutex_unlock(&arm_cspmu_lock);
> > +
> >       perf_pmu_unregister(&cspmu->pmu);
> >       cpuhp_state_remove_instance(arm_cspmu_cpuhp_state, &cspmu-
> >cpuhp_node);
> >
> > @@ -1281,6 +1495,8 @@ static int __init arm_cspmu_init(void)
> >   {
> >       int ret;
> >
> > +     arm_cspmu_state_ready();
> > +
> >       ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
> >                                       "perf/arm/cspmu:online",
> >                                       arm_cspmu_cpu_online,
> > diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h
> b/drivers/perf/arm_cspmu/arm_cspmu.h
> > index 51323b175a4a..cf3458d9fc63 100644
> > --- a/drivers/perf/arm_cspmu/arm_cspmu.h
> > +++ b/drivers/perf/arm_cspmu/arm_cspmu.h
> > @@ -1,7 +1,7 @@
> >   /* SPDX-License-Identifier: GPL-2.0
> >    *
> >    * ARM CoreSight Architecture PMU driver.
> > - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> > + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> >    *
> >    */
> >
> > @@ -68,7 +68,10 @@
> >
> >   /* PMIIDR register field */
> >   #define ARM_CSPMU_PMIIDR_IMPLEMENTER        GENMASK(11, 0)
> > +#define ARM_CSPMU_PMIIDR_REVISION    GENMASK(15, 12)
> > +#define ARM_CSPMU_PMIIDR_VARIANT     GENMASK(19, 16)
> >   #define ARM_CSPMU_PMIIDR_PRODUCTID  GENMASK(31, 20)
> > +#define ARM_CSPMU_PMIIDR_PVR         GENMASK(31, 12)
> >
> >   struct arm_cspmu;
> >
> > @@ -107,15 +110,36 @@ struct arm_cspmu_impl_ops {
> >                                        struct attribute *attr, int unused);
> >   };
> >
> > +/* Vendor/implementer registration parameter. */
> > +struct arm_cspmu_impl_param {
> > +     /* JEDEC assigned implementer id of the vendor. */
> > +     u32 impl_id;
> > +     /*
> > +      * The pvr value and mask describes the device ids covered by the
> > +      * vendor backend. pvr contains the pattern of acceptable product,
> > +      * variant, and revision bits from device's PMIIDR. pvr_mask contains
> > +      * the relevant bits when comparing pvr. 0 value on the mask means any
> > +      * pvr value is supported.
> > +      */
> > +     u32 pvr;
> > +     u32 pvr_mask;
> 
> Do we need to separate pvr from the vendor_id ? we could simply have:
> 
> pmiidr_val; /* includes Vendor id and any other fields of the pmiidr */
> pmiidr_mask;

Ok, im fine with combining them.

Thanks,
Besar

> 
> Rest looks fine to me.
> 
> Suzuki
>

Suzuki K Poulose April 19, 2023, 9:31 p.m. UTC | #3

On 18/04/2023 20:33, Besar Wicaksono wrote:
> Hi Suzuki,
> 
>> -----Original Message-----
>> From: Suzuki K Poulose <suzuki.poulose@arm.com>
>> Sent: Tuesday, April 18, 2023 6:07 AM
>> To: Besar Wicaksono <bwicaksono@nvidia.com>; catalin.marinas@arm.com;
>> will@kernel.org; mark.rutland@arm.com
>> Cc: linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org; linux-
>> tegra@vger.kernel.org; Thierry Reding <treding@nvidia.com>; Jonathan
>> Hunter <jonathanh@nvidia.com>; Vikram Sethi <vsethi@nvidia.com>; Richard
>> Wiley <rwiley@nvidia.com>; Eric Funsten <efunsten@nvidia.com>
>> Subject: Re: [PATCH v2] perf: arm_cspmu: Separate Arm and vendor module
>>
>> External email: Use caution opening links or attachments
>>
>>
>> On 18/04/2023 07:20, Besar Wicaksono wrote:
>>> Arm Coresight PMU driver consists of main standard code and vendor
>>> backend code. Both are currently built as a single module.
>>> This patch adds vendor registration API to separate the two to
>>> keep things modular. Vendor module shall register to the main
>>> module on loading and trigger device reprobe.
>>>
>>> Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
>>> ---
>>>
>>> Changes from v1:
>>>    * Added separate Kconfig entry for nvidia backend
>>>    * Added lock to protect accesses to the lists
>>>    * Added support for matching subset devices from a vendor
>>>    * Added state tracking to avoid reprobe when a device is in use
>>> v1: ttps://lore.kernel.org/linux-arm-kernel/20230403163905.20354-1-
>> bwicaksono@nvidia.com/T/#u
>>>
>>> ---
>>>    drivers/perf/arm_cspmu/Kconfig        |   9 +-
>>>    drivers/perf/arm_cspmu/Makefile       |   6 +-
>>>    drivers/perf/arm_cspmu/arm_cspmu.c    | 280
>> +++++++++++++++++++++++---
>>>    drivers/perf/arm_cspmu/arm_cspmu.h    |  32 ++-
>>>    drivers/perf/arm_cspmu/nvidia_cspmu.c |  39 +++-
>>>    drivers/perf/arm_cspmu/nvidia_cspmu.h |  17 --
>>>    6 files changed, 325 insertions(+), 58 deletions(-)
>>>    delete mode 100644 drivers/perf/arm_cspmu/nvidia_cspmu.h
>>>
>>> diff --git a/drivers/perf/arm_cspmu/Kconfig
>> b/drivers/perf/arm_cspmu/Kconfig
>>> index 0b316fe69a45..8ce7b45a0075 100644
>>> --- a/drivers/perf/arm_cspmu/Kconfig
>>> +++ b/drivers/perf/arm_cspmu/Kconfig
>>> @@ -1,6 +1,6 @@
>>>    # SPDX-License-Identifier: GPL-2.0
>>>    #
>>> -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
>> reserved.
>>> +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights
>> reserved.
>>>
>>>    config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
>>>        tristate "ARM Coresight Architecture PMU"
>>> @@ -11,3 +11,10 @@ config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
>>>          based on ARM CoreSight PMU architecture. Note that this PMU
>>>          architecture does not have relationship with the ARM CoreSight
>>>          Self-Hosted Tracing.
>>> +
>>> +config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU
>>> +     tristate "NVIDIA Coresight Architecture PMU"
>>> +     depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
>>> +     help
>>> +       Provides NVIDIA specific attributes for performance monitoring unit
>>> +       (PMU) devices based on ARM CoreSight PMU architecture.
>>> diff --git a/drivers/perf/arm_cspmu/Makefile
>> b/drivers/perf/arm_cspmu/Makefile
>>> index fedb17df982d..f8ae22411d59 100644
>>> --- a/drivers/perf/arm_cspmu/Makefile
>>> +++ b/drivers/perf/arm_cspmu/Makefile
>>> @@ -1,6 +1,6 @@
>>> -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
>> reserved.
>>> +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights
>> reserved.
>>>    #
>>>    # SPDX-License-Identifier: GPL-2.0
>>>
>>> -obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) +=
>> arm_cspmu_module.o
>>> -arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
>>> +obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) +=
>> arm_cspmu.o
>>> +obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) +=
>> nvidia_cspmu.o
>>> diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c
>> b/drivers/perf/arm_cspmu/arm_cspmu.c
>>> index e31302ab7e37..c55ea2b74454 100644
>>> --- a/drivers/perf/arm_cspmu/arm_cspmu.c
>>> +++ b/drivers/perf/arm_cspmu/arm_cspmu.c
>>> @@ -16,7 +16,7 @@
>>>     * The user should refer to the vendor technical documentation to get
>> details
>>>     * about the supported events.
>>>     *
>>> - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
>> reserved.
>>> + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights
>> reserved.
>>>     *
>>>     */
>>>
>>> @@ -25,13 +25,14 @@
>>>    #include <linux/ctype.h>
>>>    #include <linux/interrupt.h>
>>>    #include <linux/io-64-nonatomic-lo-hi.h>
>>> +#include <linux/list.h>
>>>    #include <linux/module.h>
>>> +#include <linux/mutex.h>
>>>    #include <linux/perf_event.h>
>>>    #include <linux/platform_device.h>
>>>    #include <acpi/processor.h>
>>>
>>>    #include "arm_cspmu.h"
>>> -#include "nvidia_cspmu.h"
>>>
>>>    #define PMUNAME "arm_cspmu"
>>>    #define DRVNAME "arm-cs-arch-pmu"
>>> @@ -117,11 +118,52 @@
>>>     */
>>>    #define HILOHI_MAX_POLL     1000
>>>
>>> -/* JEDEC-assigned JEP106 identification code */
>>> -#define ARM_CSPMU_IMPL_ID_NVIDIA             0x36B
>>> -
>>>    static unsigned long arm_cspmu_cpuhp_state;
>>>
>>> +/* List of Coresight PMU instances in the system. */
>>> +static LIST_HEAD(arm_cspmus);
>>> +
>>> +/* List of registered vendor backends. */
>>> +static LIST_HEAD(arm_cspmu_impls);
>>> +
>>> +static DEFINE_MUTEX(arm_cspmu_lock);
>>> +
>>> +/*
>>> + * State of the generic driver.
>>> + * 0 => registering backend.
>>> + * 1 => ready to use.
>>> + * 2 or more => in use.
>>> + */
>>> +#define ARM_CSPMU_STATE_REG  0
>>> +#define ARM_CSPMU_STATE_READY        1
>>> +static atomic_t arm_cspmu_state;
>>> +
>>> +static void arm_cspmu_state_ready(void)
>>> +{
>>> +     atomic_set(&arm_cspmu_state, ARM_CSPMU_STATE_READY);
>>> +}
>>> +
>>> +static bool try_arm_cspmu_state_reg(void)
>>> +{
>>> +     const int old = ARM_CSPMU_STATE_READY;
>>> +     const int new = ARM_CSPMU_STATE_REG;
>>> +
>>> +     return atomic_cmpxchg(&arm_cspmu_state, old, new) == old;
>>> +}
>>> +
>>> +static bool try_arm_cspmu_state_get(void)
>>> +{
>>> +     return atomic_inc_not_zero(&arm_cspmu_state);
>>> +}
>>> +
>>> +static void arm_cspmu_state_put(void)
>>> +{
>>> +     int ret;
>>> +
>>> +     ret = atomic_dec_if_positive(&arm_cspmu_state);
>>> +     WARN_ON(ret < 0);
>>> +}
>>> +
>>
>> As long as the vendor module is set for the PMU instance, it won't be
>> unloaded as long as there are any perf events and thus the specific
>> driver cannot be unloaded. So, you don't need explicit refcount
>> maintenance for each pmu callbacks.
>>
> 
> The arm_cspmu_state mainly protects during new backend registration when
> the device is attached to standard implementation. All devices are attached to
> standard implementation initially when arm_cspmu module is loaded, since there
> is no backend yet. On backend registration, the standard impl is replaced by
> backend impl. However, the module unloading mechanism doesn't provide
> protection because standard impl is owned by arm_cspmu module, which
> is not unloaded during registration.
> 
> The refcount may not be required if the devices are not attached to standard
> Implementation by default, and the unreg doesn't fallback to it. But that makes
> the devices usable only when there is a vendor backend available.

Ok, thanks for the explanation. But I still think we :

  - Don't need a single global refcount for all the PMUs. Instead this
    could be per PMU instance (arm_cspmu), only when it is backed by
    "generic" backend, others get module refcount. If the refcount of
    "generic" PMU is positive, during the registration of a matching
    backend driver, we could simply mark that as pending reprobe.

  - And only do the refcount for the following call backs:

   pmu:: event_init -> hold the refcount
   pmu:: destroy -> drop the refcount and trigger a reprobe if one was
         pending (see above)

This would allow loading (unrelated) modules even when other PMUs are
active.

>>> +static bool impl_param_match(const struct arm_cspmu_impl_param *A,
>>> +                          const struct arm_cspmu_impl_param *B)
>>> +{
>>> +     /*
>>> +      * Match criteria:
>>> +      * - Implementer id should match.
>>> +      * - A's device id is within B's range, or vice versa. This allows
>>> +      *   vendor to register backend for a range of devices.
>>> +      */
>>> +     if ((A->impl_id == B->impl_id) &&
>>> +         (((A->pvr & A->pvr_mask) == (B->pvr & A->pvr_mask)) ||
>>> +          ((A->pvr & B->pvr_mask) == (B->pvr & B->pvr_mask))))
>>> +             return true;
>>> +
>>
>> nit: Please do not use CAPITAL letters for variable names. Could this
>> simply accept a pmiidr and a impl_match and match the fields with that
>> of the mask/value pair. See more below.
>>
> 
> The bitfield comparison in impl_param_match can be reused for different purposes:
> 1. Test between two mask/value pairs to check if device id ranges from
>       both pairs are intersecting. We use it during validation in (un)registration.
> 2. Along with to_impl_param, we can compare a pmiidr value with a
>       mask/value pair to check if a device is compatible with a backend. We use
>       it during reprobe and initializing impl_ops.

Ok.

Suzuki

Besar Wicaksono April 20, 2023, 12:22 a.m. UTC | #4

Hi Suzuki,

> -----Original Message-----
> From: Suzuki K Poulose <suzuki.poulose@arm.com>
> Sent: Wednesday, April 19, 2023 4:32 PM
> To: Besar Wicaksono <bwicaksono@nvidia.com>; catalin.marinas@arm.com;
> will@kernel.org; mark.rutland@arm.com
> Cc: linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org; linux-
> tegra@vger.kernel.org; Thierry Reding <treding@nvidia.com>; Jonathan
> Hunter <jonathanh@nvidia.com>; Vikram Sethi <vsethi@nvidia.com>; Richard
> Wiley <rwiley@nvidia.com>; Eric Funsten <efunsten@nvidia.com>
> Subject: Re: [PATCH v2] perf: arm_cspmu: Separate Arm and vendor module
> 
> External email: Use caution opening links or attachments
> 
> 
> On 18/04/2023 20:33, Besar Wicaksono wrote:
> > Hi Suzuki,
> >
> >> -----Original Message-----
> >> From: Suzuki K Poulose <suzuki.poulose@arm.com>
> >> Sent: Tuesday, April 18, 2023 6:07 AM
> >> To: Besar Wicaksono <bwicaksono@nvidia.com>;
> catalin.marinas@arm.com;
> >> will@kernel.org; mark.rutland@arm.com
> >> Cc: linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org;
> linux-
> >> tegra@vger.kernel.org; Thierry Reding <treding@nvidia.com>; Jonathan
> >> Hunter <jonathanh@nvidia.com>; Vikram Sethi <vsethi@nvidia.com>;
> Richard
> >> Wiley <rwiley@nvidia.com>; Eric Funsten <efunsten@nvidia.com>
> >> Subject: Re: [PATCH v2] perf: arm_cspmu: Separate Arm and vendor
> module
> >>
> >> External email: Use caution opening links or attachments
> >>
> >>
> >> On 18/04/2023 07:20, Besar Wicaksono wrote:
> >>> Arm Coresight PMU driver consists of main standard code and vendor
> >>> backend code. Both are currently built as a single module.
> >>> This patch adds vendor registration API to separate the two to
> >>> keep things modular. Vendor module shall register to the main
> >>> module on loading and trigger device reprobe.
> >>>
> >>> Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
> >>> ---
> >>>
> >>> Changes from v1:
> >>>    * Added separate Kconfig entry for nvidia backend
> >>>    * Added lock to protect accesses to the lists
> >>>    * Added support for matching subset devices from a vendor
> >>>    * Added state tracking to avoid reprobe when a device is in use
> >>> v1: ttps://lore.kernel.org/linux-arm-kernel/20230403163905.20354-1-
> >> bwicaksono@nvidia.com/T/#u
> >>>
> >>> ---
> >>>    drivers/perf/arm_cspmu/Kconfig        |   9 +-
> >>>    drivers/perf/arm_cspmu/Makefile       |   6 +-
> >>>    drivers/perf/arm_cspmu/arm_cspmu.c    | 280
> >> +++++++++++++++++++++++---
> >>>    drivers/perf/arm_cspmu/arm_cspmu.h    |  32 ++-
> >>>    drivers/perf/arm_cspmu/nvidia_cspmu.c |  39 +++-
> >>>    drivers/perf/arm_cspmu/nvidia_cspmu.h |  17 --
> >>>    6 files changed, 325 insertions(+), 58 deletions(-)
> >>>    delete mode 100644 drivers/perf/arm_cspmu/nvidia_cspmu.h
> >>>
> >>> diff --git a/drivers/perf/arm_cspmu/Kconfig
> >> b/drivers/perf/arm_cspmu/Kconfig
> >>> index 0b316fe69a45..8ce7b45a0075 100644
> >>> --- a/drivers/perf/arm_cspmu/Kconfig
> >>> +++ b/drivers/perf/arm_cspmu/Kconfig
> >>> @@ -1,6 +1,6 @@
> >>>    # SPDX-License-Identifier: GPL-2.0
> >>>    #
> >>> -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
> >> reserved.
> >>> +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All
> rights
> >> reserved.
> >>>
> >>>    config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> >>>        tristate "ARM Coresight Architecture PMU"
> >>> @@ -11,3 +11,10 @@ config
> ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> >>>          based on ARM CoreSight PMU architecture. Note that this PMU
> >>>          architecture does not have relationship with the ARM CoreSight
> >>>          Self-Hosted Tracing.
> >>> +
> >>> +config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> >>> +     tristate "NVIDIA Coresight Architecture PMU"
> >>> +     depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> >>> +     help
> >>> +       Provides NVIDIA specific attributes for performance monitoring unit
> >>> +       (PMU) devices based on ARM CoreSight PMU architecture.
> >>> diff --git a/drivers/perf/arm_cspmu/Makefile
> >> b/drivers/perf/arm_cspmu/Makefile
> >>> index fedb17df982d..f8ae22411d59 100644
> >>> --- a/drivers/perf/arm_cspmu/Makefile
> >>> +++ b/drivers/perf/arm_cspmu/Makefile
> >>> @@ -1,6 +1,6 @@
> >>> -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
> >> reserved.
> >>> +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All
> rights
> >> reserved.
> >>>    #
> >>>    # SPDX-License-Identifier: GPL-2.0
> >>>
> >>> -obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) +=
> >> arm_cspmu_module.o
> >>> -arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
> >>> +obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) +=
> >> arm_cspmu.o
> >>> +obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) +=
> >> nvidia_cspmu.o
> >>> diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c
> >> b/drivers/perf/arm_cspmu/arm_cspmu.c
> >>> index e31302ab7e37..c55ea2b74454 100644
> >>> --- a/drivers/perf/arm_cspmu/arm_cspmu.c
> >>> +++ b/drivers/perf/arm_cspmu/arm_cspmu.c
> >>> @@ -16,7 +16,7 @@
> >>>     * The user should refer to the vendor technical documentation to get
> >> details
> >>>     * about the supported events.
> >>>     *
> >>> - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
> >> reserved.
> >>> + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All
> rights
> >> reserved.
> >>>     *
> >>>     */
> >>>
> >>> @@ -25,13 +25,14 @@
> >>>    #include <linux/ctype.h>
> >>>    #include <linux/interrupt.h>
> >>>    #include <linux/io-64-nonatomic-lo-hi.h>
> >>> +#include <linux/list.h>
> >>>    #include <linux/module.h>
> >>> +#include <linux/mutex.h>
> >>>    #include <linux/perf_event.h>
> >>>    #include <linux/platform_device.h>
> >>>    #include <acpi/processor.h>
> >>>
> >>>    #include "arm_cspmu.h"
> >>> -#include "nvidia_cspmu.h"
> >>>
> >>>    #define PMUNAME "arm_cspmu"
> >>>    #define DRVNAME "arm-cs-arch-pmu"
> >>> @@ -117,11 +118,52 @@
> >>>     */
> >>>    #define HILOHI_MAX_POLL     1000
> >>>
> >>> -/* JEDEC-assigned JEP106 identification code */
> >>> -#define ARM_CSPMU_IMPL_ID_NVIDIA             0x36B
> >>> -
> >>>    static unsigned long arm_cspmu_cpuhp_state;
> >>>
> >>> +/* List of Coresight PMU instances in the system. */
> >>> +static LIST_HEAD(arm_cspmus);
> >>> +
> >>> +/* List of registered vendor backends. */
> >>> +static LIST_HEAD(arm_cspmu_impls);
> >>> +
> >>> +static DEFINE_MUTEX(arm_cspmu_lock);
> >>> +
> >>> +/*
> >>> + * State of the generic driver.
> >>> + * 0 => registering backend.
> >>> + * 1 => ready to use.
> >>> + * 2 or more => in use.
> >>> + */
> >>> +#define ARM_CSPMU_STATE_REG  0
> >>> +#define ARM_CSPMU_STATE_READY        1
> >>> +static atomic_t arm_cspmu_state;
> >>> +
> >>> +static void arm_cspmu_state_ready(void)
> >>> +{
> >>> +     atomic_set(&arm_cspmu_state, ARM_CSPMU_STATE_READY);
> >>> +}
> >>> +
> >>> +static bool try_arm_cspmu_state_reg(void)
> >>> +{
> >>> +     const int old = ARM_CSPMU_STATE_READY;
> >>> +     const int new = ARM_CSPMU_STATE_REG;
> >>> +
> >>> +     return atomic_cmpxchg(&arm_cspmu_state, old, new) == old;
> >>> +}
> >>> +
> >>> +static bool try_arm_cspmu_state_get(void)
> >>> +{
> >>> +     return atomic_inc_not_zero(&arm_cspmu_state);
> >>> +}
> >>> +
> >>> +static void arm_cspmu_state_put(void)
> >>> +{
> >>> +     int ret;
> >>> +
> >>> +     ret = atomic_dec_if_positive(&arm_cspmu_state);
> >>> +     WARN_ON(ret < 0);
> >>> +}
> >>> +
> >>
> >> As long as the vendor module is set for the PMU instance, it won't be
> >> unloaded as long as there are any perf events and thus the specific
> >> driver cannot be unloaded. So, you don't need explicit refcount
> >> maintenance for each pmu callbacks.
> >>
> >
> > The arm_cspmu_state mainly protects during new backend registration
> when
> > the device is attached to standard implementation. All devices are attached
> to
> > standard implementation initially when arm_cspmu module is loaded, since
> there
> > is no backend yet. On backend registration, the standard impl is replaced by
> > backend impl. However, the module unloading mechanism doesn't provide
> > protection because standard impl is owned by arm_cspmu module, which
> > is not unloaded during registration.
> >
> > The refcount may not be required if the devices are not attached to standard
> > Implementation by default, and the unreg doesn't fallback to it. But that
> makes
> > the devices usable only when there is a vendor backend available.
> 
> Ok, thanks for the explanation. But I still think we :
> 
>   - Don't need a single global refcount for all the PMUs. Instead this
>     could be per PMU instance (arm_cspmu), only when it is backed by

Ok, we can add refcount per PMU.

>     "generic" backend, others get module refcount. If the refcount of
>     "generic" PMU is positive, during the registration of a matching
>     backend driver, we could simply mark that as pending reprobe.
> 
>   - And only do the refcount for the following call backs:
> 
>    pmu:: event_init -> hold the refcount
>    pmu:: destroy -> drop the refcount and trigger a reprobe if one was
>          pending (see above)

Is it safe to reprobe on destroy ? The reprobe will free and reallocate
the memory managed by the device.

Thanks,
Besar

Besar Wicaksono April 27, 2023, 12:44 a.m. UTC | #5

Hi Suzuki,

> -----Original Message-----
> From: Besar Wicaksono <bwicaksono@nvidia.com>
> Sent: Wednesday, April 19, 2023 7:22 PM
> To: Suzuki K Poulose <suzuki.poulose@arm.com>; catalin.marinas@arm.com;
> will@kernel.org; mark.rutland@arm.com
> Cc: linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org; linux-
> tegra@vger.kernel.org; Thierry Reding <treding@nvidia.com>; Jonathan
> Hunter <jonathanh@nvidia.com>; Vikram Sethi <vsethi@nvidia.com>; Richard
> Wiley <rwiley@nvidia.com>; Eric Funsten <efunsten@nvidia.com>
> Subject: RE: [PATCH v2] perf: arm_cspmu: Separate Arm and vendor module
> 
> Hi Suzuki,
> 
> > -----Original Message-----
> > From: Suzuki K Poulose <suzuki.poulose@arm.com>
> > Sent: Wednesday, April 19, 2023 4:32 PM
> > To: Besar Wicaksono <bwicaksono@nvidia.com>; catalin.marinas@arm.com;
> > will@kernel.org; mark.rutland@arm.com
> > Cc: linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org; linux-
> > tegra@vger.kernel.org; Thierry Reding <treding@nvidia.com>; Jonathan
> > Hunter <jonathanh@nvidia.com>; Vikram Sethi <vsethi@nvidia.com>;
> Richard
> > Wiley <rwiley@nvidia.com>; Eric Funsten <efunsten@nvidia.com>
> > Subject: Re: [PATCH v2] perf: arm_cspmu: Separate Arm and vendor module
> >
> > External email: Use caution opening links or attachments
> >
> >
> > On 18/04/2023 20:33, Besar Wicaksono wrote:
> > > Hi Suzuki,
> > >
> > >> -----Original Message-----
> > >> From: Suzuki K Poulose <suzuki.poulose@arm.com>
> > >> Sent: Tuesday, April 18, 2023 6:07 AM
> > >> To: Besar Wicaksono <bwicaksono@nvidia.com>;
> > catalin.marinas@arm.com;
> > >> will@kernel.org; mark.rutland@arm.com
> > >> Cc: linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org;
> > linux-
> > >> tegra@vger.kernel.org; Thierry Reding <treding@nvidia.com>; Jonathan
> > >> Hunter <jonathanh@nvidia.com>; Vikram Sethi <vsethi@nvidia.com>;
> > Richard
> > >> Wiley <rwiley@nvidia.com>; Eric Funsten <efunsten@nvidia.com>
> > >> Subject: Re: [PATCH v2] perf: arm_cspmu: Separate Arm and vendor
> > module
> > >>
> > >> External email: Use caution opening links or attachments
> > >>
> > >>
> > >> On 18/04/2023 07:20, Besar Wicaksono wrote:
> > >>> Arm Coresight PMU driver consists of main standard code and vendor
> > >>> backend code. Both are currently built as a single module.
> > >>> This patch adds vendor registration API to separate the two to
> > >>> keep things modular. Vendor module shall register to the main
> > >>> module on loading and trigger device reprobe.
> > >>>
> > >>> Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
> > >>> ---
> > >>>
> > >>> Changes from v1:
> > >>>    * Added separate Kconfig entry for nvidia backend
> > >>>    * Added lock to protect accesses to the lists
> > >>>    * Added support for matching subset devices from a vendor
> > >>>    * Added state tracking to avoid reprobe when a device is in use
> > >>> v1: ttps://lore.kernel.org/linux-arm-kernel/20230403163905.20354-1-
> > >> bwicaksono@nvidia.com/T/#u
> > >>>
> > >>> ---
> > >>>    drivers/perf/arm_cspmu/Kconfig        |   9 +-
> > >>>    drivers/perf/arm_cspmu/Makefile       |   6 +-
> > >>>    drivers/perf/arm_cspmu/arm_cspmu.c    | 280
> > >> +++++++++++++++++++++++---
> > >>>    drivers/perf/arm_cspmu/arm_cspmu.h    |  32 ++-
> > >>>    drivers/perf/arm_cspmu/nvidia_cspmu.c |  39 +++-
> > >>>    drivers/perf/arm_cspmu/nvidia_cspmu.h |  17 --
> > >>>    6 files changed, 325 insertions(+), 58 deletions(-)
> > >>>    delete mode 100644 drivers/perf/arm_cspmu/nvidia_cspmu.h
> > >>>
> > >>> diff --git a/drivers/perf/arm_cspmu/Kconfig
> > >> b/drivers/perf/arm_cspmu/Kconfig
> > >>> index 0b316fe69a45..8ce7b45a0075 100644
> > >>> --- a/drivers/perf/arm_cspmu/Kconfig
> > >>> +++ b/drivers/perf/arm_cspmu/Kconfig
> > >>> @@ -1,6 +1,6 @@
> > >>>    # SPDX-License-Identifier: GPL-2.0
> > >>>    #
> > >>> -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
> > >> reserved.
> > >>> +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All
> > rights
> > >> reserved.
> > >>>
> > >>>    config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> > >>>        tristate "ARM Coresight Architecture PMU"
> > >>> @@ -11,3 +11,10 @@ config
> > ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> > >>>          based on ARM CoreSight PMU architecture. Note that this PMU
> > >>>          architecture does not have relationship with the ARM CoreSight
> > >>>          Self-Hosted Tracing.
> > >>> +
> > >>> +config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> > >>> +     tristate "NVIDIA Coresight Architecture PMU"
> > >>> +     depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> > >>> +     help
> > >>> +       Provides NVIDIA specific attributes for performance monitoring unit
> > >>> +       (PMU) devices based on ARM CoreSight PMU architecture.
> > >>> diff --git a/drivers/perf/arm_cspmu/Makefile
> > >> b/drivers/perf/arm_cspmu/Makefile
> > >>> index fedb17df982d..f8ae22411d59 100644
> > >>> --- a/drivers/perf/arm_cspmu/Makefile
> > >>> +++ b/drivers/perf/arm_cspmu/Makefile
> > >>> @@ -1,6 +1,6 @@
> > >>> -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
> > >> reserved.
> > >>> +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All
> > rights
> > >> reserved.
> > >>>    #
> > >>>    # SPDX-License-Identifier: GPL-2.0
> > >>>
> > >>> -obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) +=
> > >> arm_cspmu_module.o
> > >>> -arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
> > >>> +obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) +=
> > >> arm_cspmu.o
> > >>> +obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) +=
> > >> nvidia_cspmu.o
> > >>> diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c
> > >> b/drivers/perf/arm_cspmu/arm_cspmu.c
> > >>> index e31302ab7e37..c55ea2b74454 100644
> > >>> --- a/drivers/perf/arm_cspmu/arm_cspmu.c
> > >>> +++ b/drivers/perf/arm_cspmu/arm_cspmu.c
> > >>> @@ -16,7 +16,7 @@
> > >>>     * The user should refer to the vendor technical documentation to get
> > >> details
> > >>>     * about the supported events.
> > >>>     *
> > >>> - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
> > >> reserved.
> > >>> + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All
> > rights
> > >> reserved.
> > >>>     *
> > >>>     */
> > >>>
> > >>> @@ -25,13 +25,14 @@
> > >>>    #include <linux/ctype.h>
> > >>>    #include <linux/interrupt.h>
> > >>>    #include <linux/io-64-nonatomic-lo-hi.h>
> > >>> +#include <linux/list.h>
> > >>>    #include <linux/module.h>
> > >>> +#include <linux/mutex.h>
> > >>>    #include <linux/perf_event.h>
> > >>>    #include <linux/platform_device.h>
> > >>>    #include <acpi/processor.h>
> > >>>
> > >>>    #include "arm_cspmu.h"
> > >>> -#include "nvidia_cspmu.h"
> > >>>
> > >>>    #define PMUNAME "arm_cspmu"
> > >>>    #define DRVNAME "arm-cs-arch-pmu"
> > >>> @@ -117,11 +118,52 @@
> > >>>     */
> > >>>    #define HILOHI_MAX_POLL     1000
> > >>>
> > >>> -/* JEDEC-assigned JEP106 identification code */
> > >>> -#define ARM_CSPMU_IMPL_ID_NVIDIA             0x36B
> > >>> -
> > >>>    static unsigned long arm_cspmu_cpuhp_state;
> > >>>
> > >>> +/* List of Coresight PMU instances in the system. */
> > >>> +static LIST_HEAD(arm_cspmus);
> > >>> +
> > >>> +/* List of registered vendor backends. */
> > >>> +static LIST_HEAD(arm_cspmu_impls);
> > >>> +
> > >>> +static DEFINE_MUTEX(arm_cspmu_lock);
> > >>> +
> > >>> +/*
> > >>> + * State of the generic driver.
> > >>> + * 0 => registering backend.
> > >>> + * 1 => ready to use.
> > >>> + * 2 or more => in use.
> > >>> + */
> > >>> +#define ARM_CSPMU_STATE_REG  0
> > >>> +#define ARM_CSPMU_STATE_READY        1
> > >>> +static atomic_t arm_cspmu_state;
> > >>> +
> > >>> +static void arm_cspmu_state_ready(void)
> > >>> +{
> > >>> +     atomic_set(&arm_cspmu_state, ARM_CSPMU_STATE_READY);
> > >>> +}
> > >>> +
> > >>> +static bool try_arm_cspmu_state_reg(void)
> > >>> +{
> > >>> +     const int old = ARM_CSPMU_STATE_READY;
> > >>> +     const int new = ARM_CSPMU_STATE_REG;
> > >>> +
> > >>> +     return atomic_cmpxchg(&arm_cspmu_state, old, new) == old;
> > >>> +}
> > >>> +
> > >>> +static bool try_arm_cspmu_state_get(void)
> > >>> +{
> > >>> +     return atomic_inc_not_zero(&arm_cspmu_state);
> > >>> +}
> > >>> +
> > >>> +static void arm_cspmu_state_put(void)
> > >>> +{
> > >>> +     int ret;
> > >>> +
> > >>> +     ret = atomic_dec_if_positive(&arm_cspmu_state);
> > >>> +     WARN_ON(ret < 0);
> > >>> +}
> > >>> +
> > >>
> > >> As long as the vendor module is set for the PMU instance, it won't be
> > >> unloaded as long as there are any perf events and thus the specific
> > >> driver cannot be unloaded. So, you don't need explicit refcount
> > >> maintenance for each pmu callbacks.
> > >>
> > >
> > > The arm_cspmu_state mainly protects during new backend registration
> > when
> > > the device is attached to standard implementation. All devices are attached
> > to
> > > standard implementation initially when arm_cspmu module is loaded,
> since
> > there
> > > is no backend yet. On backend registration, the standard impl is replaced by
> > > backend impl. However, the module unloading mechanism doesn't provide
> > > protection because standard impl is owned by arm_cspmu module, which
> > > is not unloaded during registration.
> > >
> > > The refcount may not be required if the devices are not attached to
> standard
> > > Implementation by default, and the unreg doesn't fallback to it. But that
> > makes
> > > the devices usable only when there is a vendor backend available.
> >
> > Ok, thanks for the explanation. But I still think we :
> >
> >   - Don't need a single global refcount for all the PMUs. Instead this
> >     could be per PMU instance (arm_cspmu), only when it is backed by
> 
> Ok, we can add refcount per PMU.
> 
> >     "generic" backend, others get module refcount. If the refcount of
> >     "generic" PMU is positive, during the registration of a matching
> >     backend driver, we could simply mark that as pending reprobe.
> >
> >   - And only do the refcount for the following call backs:
> >
> >    pmu:: event_init -> hold the refcount
> >    pmu:: destroy -> drop the refcount and trigger a reprobe if one was
> >          pending (see above)
> 
> Is it safe to reprobe on destroy ? The reprobe will free and reallocate
> the memory managed by the device.

I checked the _free_event function from kernel/events/core.c. There are other
cleanups after the event->destroy is called, which may touch a stale data after a
reprobe occurs.
This would suggest reprobing right after event->destroy may not be safe, whether
it's a deferred reprobe or immediate reprobe from arm_cspmu_impl_register.

The alternative of not attaching the device to standard implementation by default
seems to be the better one so far.

Do you have other suggestion ?

> 
> Thanks,
> Besar

Robin Murphy April 27, 2023, 12:20 p.m. UTC | #6

On 2023-04-18 07:20, Besar Wicaksono wrote:
> Arm Coresight PMU driver consists of main standard code and vendor
> backend code. Both are currently built as a single module.
> This patch adds vendor registration API to separate the two to
> keep things modular. Vendor module shall register to the main
> module on loading and trigger device reprobe.

I think it might be considerably cleaner and safer if the main driver 
retained at least some knowledge of the PMIIDR matches and used those to 
explicity request the relevant module. Otherwise, not only is there an 
awful lot of fiddly complexity here, but there's also quite a burden on 
the user to know which modules they have to load to get full 
functionality on any given system.

FYI I've just started working on adding devicetree support, and I do 
need the generic architectural functionality to keep working in the 
absence of any imp-def backend.

Thanks,
Robin.

> 
> Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
> ---
> 
> Changes from v1:
>   * Added separate Kconfig entry for nvidia backend
>   * Added lock to protect accesses to the lists
>   * Added support for matching subset devices from a vendor
>   * Added state tracking to avoid reprobe when a device is in use
> v1: ttps://lore.kernel.org/linux-arm-kernel/20230403163905.20354-1-bwicaksono@nvidia.com/T/#u
> 
> ---
>   drivers/perf/arm_cspmu/Kconfig        |   9 +-
>   drivers/perf/arm_cspmu/Makefile       |   6 +-
>   drivers/perf/arm_cspmu/arm_cspmu.c    | 280 +++++++++++++++++++++++---
>   drivers/perf/arm_cspmu/arm_cspmu.h    |  32 ++-
>   drivers/perf/arm_cspmu/nvidia_cspmu.c |  39 +++-
>   drivers/perf/arm_cspmu/nvidia_cspmu.h |  17 --
>   6 files changed, 325 insertions(+), 58 deletions(-)
>   delete mode 100644 drivers/perf/arm_cspmu/nvidia_cspmu.h
> 
> diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig
> index 0b316fe69a45..8ce7b45a0075 100644
> --- a/drivers/perf/arm_cspmu/Kconfig
> +++ b/drivers/perf/arm_cspmu/Kconfig
> @@ -1,6 +1,6 @@
>   # SPDX-License-Identifier: GPL-2.0
>   #
> -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
> +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
>   
>   config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
>   	tristate "ARM Coresight Architecture PMU"
> @@ -11,3 +11,10 @@ config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
>   	  based on ARM CoreSight PMU architecture. Note that this PMU
>   	  architecture does not have relationship with the ARM CoreSight
>   	  Self-Hosted Tracing.
> +
> +config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> +	tristate "NVIDIA Coresight Architecture PMU"
> +	depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> +	help
> +	  Provides NVIDIA specific attributes for performance monitoring unit
> +	  (PMU) devices based on ARM CoreSight PMU architecture.
> diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile
> index fedb17df982d..f8ae22411d59 100644
> --- a/drivers/perf/arm_cspmu/Makefile
> +++ b/drivers/perf/arm_cspmu/Makefile
> @@ -1,6 +1,6 @@
> -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
> +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
>   #
>   # SPDX-License-Identifier: GPL-2.0
>   
> -obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
> -arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
> +obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu.o
> +obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += nvidia_cspmu.o
> diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
> index e31302ab7e37..c55ea2b74454 100644
> --- a/drivers/perf/arm_cspmu/arm_cspmu.c
> +++ b/drivers/perf/arm_cspmu/arm_cspmu.c
> @@ -16,7 +16,7 @@
>    * The user should refer to the vendor technical documentation to get details
>    * about the supported events.
>    *
> - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
> + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
>    *
>    */
>   
> @@ -25,13 +25,14 @@
>   #include <linux/ctype.h>
>   #include <linux/interrupt.h>
>   #include <linux/io-64-nonatomic-lo-hi.h>
> +#include <linux/list.h>
>   #include <linux/module.h>
> +#include <linux/mutex.h>
>   #include <linux/perf_event.h>
>   #include <linux/platform_device.h>
>   #include <acpi/processor.h>
>   
>   #include "arm_cspmu.h"
> -#include "nvidia_cspmu.h"
>   
>   #define PMUNAME "arm_cspmu"
>   #define DRVNAME "arm-cs-arch-pmu"
> @@ -117,11 +118,52 @@
>    */
>   #define HILOHI_MAX_POLL	1000
>   
> -/* JEDEC-assigned JEP106 identification code */
> -#define ARM_CSPMU_IMPL_ID_NVIDIA		0x36B
> -
>   static unsigned long arm_cspmu_cpuhp_state;
>   
> +/* List of Coresight PMU instances in the system. */
> +static LIST_HEAD(arm_cspmus);
> +
> +/* List of registered vendor backends. */
> +static LIST_HEAD(arm_cspmu_impls);
> +
> +static DEFINE_MUTEX(arm_cspmu_lock);
> +
> +/*
> + * State of the generic driver.
> + * 0 => registering backend.
> + * 1 => ready to use.
> + * 2 or more => in use.
> + */
> +#define ARM_CSPMU_STATE_REG	0
> +#define ARM_CSPMU_STATE_READY	1
> +static atomic_t arm_cspmu_state;
> +
> +static void arm_cspmu_state_ready(void)
> +{
> +	atomic_set(&arm_cspmu_state, ARM_CSPMU_STATE_READY);
> +}
> +
> +static bool try_arm_cspmu_state_reg(void)
> +{
> +	const int old = ARM_CSPMU_STATE_READY;
> +	const int new = ARM_CSPMU_STATE_REG;
> +
> +	return atomic_cmpxchg(&arm_cspmu_state, old, new) == old;
> +}
> +
> +static bool try_arm_cspmu_state_get(void)
> +{
> +	return atomic_inc_not_zero(&arm_cspmu_state);
> +}
> +
> +static void arm_cspmu_state_put(void)
> +{
> +	int ret;
> +
> +	ret = atomic_dec_if_positive(&arm_cspmu_state);
> +	WARN_ON(ret < 0);
> +}
> +
>   /*
>    * In CoreSight PMU architecture, all of the MMIO registers are 32-bit except
>    * counter register. The counter register can be implemented as 32-bit or 64-bit
> @@ -380,26 +422,161 @@ static struct attribute_group arm_cspmu_cpumask_attr_group = {
>   };
>   
>   struct impl_match {
> -	u32 pmiidr;
> -	u32 mask;
> -	int (*impl_init_ops)(struct arm_cspmu *cspmu);
> +	struct list_head next;
> +	struct arm_cspmu_impl_param param;
>   };
>   
> -static const struct impl_match impl_match[] = {
> -	{
> -	  .pmiidr = ARM_CSPMU_IMPL_ID_NVIDIA,
> -	  .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
> -	  .impl_init_ops = nv_cspmu_init_ops
> -	},
> -	{}
> -};
> +static struct arm_cspmu_impl_param to_impl_param(const struct arm_cspmu *cspmu)
> +{
> +	struct arm_cspmu_impl_param ret = {0};
> +	u32 pmiidr = cspmu->impl.pmiidr;
> +
> +	ret.impl_id = FIELD_GET(ARM_CSPMU_PMIIDR_IMPLEMENTER, pmiidr);
> +	ret.pvr = FIELD_GET(ARM_CSPMU_PMIIDR_PVR, pmiidr);
> +	ret.pvr_mask = GENMASK(31, 0);
> +
> +	return ret;
> +}
> +
> +static bool impl_param_match(const struct arm_cspmu_impl_param *A,
> +			     const struct arm_cspmu_impl_param *B)
> +{
> +	/*
> +	 * Match criteria:
> +	 * - Implementer id should match.
> +	 * - A's device id is within B's range, or vice versa. This allows
> +	 *   vendor to register backend for a range of devices.
> +	 */
> +	if ((A->impl_id == B->impl_id) &&
> +	    (((A->pvr & A->pvr_mask) == (B->pvr & A->pvr_mask)) ||
> +	     ((A->pvr & B->pvr_mask) == (B->pvr & B->pvr_mask))))
> +		return true;
> +
> +	return false;
> +}
> +
> +static struct impl_match *impl_match_find(
> +	const struct arm_cspmu_impl_param *impl_param)
> +{
> +	struct impl_match *impl_match;
> +
> +	list_for_each_entry(impl_match, &arm_cspmu_impls, next) {
> +		if (impl_param_match(impl_param, &impl_match->param))
> +			return impl_match;
> +	}
> +
> +	return NULL;
> +}
> +
> +static int arm_cspmu_impl_reprobe(
> +	const struct arm_cspmu_impl_param *impl_param)
> +{
> +	struct arm_cspmu *cspmu, *temp;
> +	LIST_HEAD(reprobe_list);
> +	int ret = 0;
> +
> +	mutex_lock(&arm_cspmu_lock);
> +
> +	/* Move the matching devices to temp list to avoid recursive lock. */
> +	list_for_each_entry_safe(cspmu, temp, &arm_cspmus, next) {
> +		struct arm_cspmu_impl_param match_param = to_impl_param(cspmu);
> +
> +		if (impl_param_match(impl_param, &match_param))
> +			list_move(&cspmu->next, &reprobe_list);
> +	}
> +
> +	mutex_unlock(&arm_cspmu_lock);
> +
> +	/* Reprobe the devices. */
> +	list_for_each_entry_safe(cspmu, temp, &reprobe_list, next) {
> +		ret = device_reprobe(cspmu->dev);
> +		if (ret) {
> +			pr_err("arm_cspmu fail reprobe err: %d\n", ret);
> +			return ret;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +int arm_cspmu_impl_register(const struct arm_cspmu_impl_param *impl_param)
> +{
> +	struct impl_match *match;
> +	int ret = 0;
> +
> +	if (!try_arm_cspmu_state_reg()) {
> +		pr_err("arm_cspmu reg failed, device(s) is in use\n");
> +		return -EBUSY;
> +	}
> +
> +	mutex_lock(&arm_cspmu_lock);
> +
> +	match = impl_match_find(impl_param);
> +	if (match) {
> +		pr_err("arm_cspmu reg failed, impl: 0x%x, pvr: 0x%x, pvr_mask: 0x%x already exists\n",
> +			match->param.impl_id, match->param.pvr,
> +			match->param.pvr_mask);
> +		mutex_unlock(&arm_cspmu_lock);
> +		arm_cspmu_state_ready();
> +		return -EINVAL;
> +	}
> +
> +	match = kzalloc(sizeof(struct impl_match), GFP_KERNEL);
> +	if (!match) {
> +		mutex_unlock(&arm_cspmu_lock);
> +		arm_cspmu_state_ready();
> +		return -ENOMEM;
> +	}
> +
> +	memcpy(&match->param, impl_param, sizeof(match->param));
> +	list_add(&match->next, &arm_cspmu_impls);
> +
> +	mutex_unlock(&arm_cspmu_lock);
> +
> +	/* Replace generic backend with vendor implementation. */
> +	ret = arm_cspmu_impl_reprobe(impl_param);
> +
> +	if (ret)
> +		arm_cspmu_impl_unregister(impl_param);
> +
> +	arm_cspmu_state_ready();
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(arm_cspmu_impl_register);
> +
> +void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_param *impl_param)
> +{
> +	struct impl_match *match;
> +
> +	mutex_lock(&arm_cspmu_lock);
> +
> +	match = impl_match_find(impl_param);
> +	if (!match) {
> +		pr_err("arm_cspmu unreg failed, unable to find impl: 0x%x, pvr: 0x%x, pvr_mask: 0x%x\n",
> +			impl_param->impl_id, impl_param->pvr,
> +			impl_param->pvr_mask);
> +		mutex_unlock(&arm_cspmu_lock);
> +		return;
> +	}
> +
> +	list_del(&match->next);
> +	kfree(match);
> +
> +	mutex_unlock(&arm_cspmu_lock);
> +
> +	/* Re-attach devices to standard driver. */
> +	arm_cspmu_impl_reprobe(impl_param);
> +}
> +EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister);
>   
>   static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
>   {
> -	int ret;
> +	int ret = 0;
>   	struct acpi_apmt_node *apmt_node = cspmu->apmt_node;
>   	struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
> -	const struct impl_match *match = impl_match;
> +	struct arm_cspmu_impl_param match_param = {0};
> +	const struct impl_match *match;
>   
>   	/*
>   	 * Get PMU implementer and product id from APMT node.
> @@ -410,19 +587,23 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
>   		(apmt_node->impl_id) ? apmt_node->impl_id :
>   				       readl(cspmu->base0 + PMIIDR);
>   
> -	/* Find implementer specific attribute ops. */
> -	for (; match->pmiidr; match++) {
> -		const u32 mask = match->mask;
> +	cspmu->impl.module = THIS_MODULE;
>   
> -		if ((match->pmiidr & mask) == (cspmu->impl.pmiidr & mask)) {
> -			ret = match->impl_init_ops(cspmu);
> -			if (ret)
> -				return ret;
> +	mutex_lock(&arm_cspmu_lock);
>   
> -			break;
> -		}
> +	/* Find implementer specific attribute ops. */
> +	match_param = to_impl_param(cspmu);
> +	match = impl_match_find(&match_param);
> +	if (match) {
> +		cspmu->impl.module = match->param.module;
> +		ret = match->param.impl_init_ops(cspmu);
>   	}
>   
> +	mutex_unlock(&arm_cspmu_lock);
> +
> +	if (ret)
> +		return ret;
> +
>   	/* Use default callbacks if implementer doesn't provide one. */
>   	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs);
>   	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_format_attrs);
> @@ -639,6 +820,11 @@ static int arm_cspmu_event_init(struct perf_event *event)
>   	struct arm_cspmu *cspmu;
>   	struct hw_perf_event *hwc = &event->hw;
>   
> +	if (!try_arm_cspmu_state_get()) {
> +		pr_err("arm_cspmu event_init fail: driver is reprobing\n");
> +		return -EBUSY;
> +	}
> +
>   	cspmu = to_arm_cspmu(event->pmu);
>   
>   	/*
> @@ -648,12 +834,14 @@ static int arm_cspmu_event_init(struct perf_event *event)
>   	if (is_sampling_event(event)) {
>   		dev_dbg(cspmu->pmu.dev,
>   			"Can't support sampling events\n");
> +		arm_cspmu_state_put();
>   		return -EOPNOTSUPP;
>   	}
>   
>   	if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) {
>   		dev_dbg(cspmu->pmu.dev,
>   			"Can't support per-task counters\n");
> +		arm_cspmu_state_put();
>   		return -EINVAL;
>   	}
>   
> @@ -664,16 +852,21 @@ static int arm_cspmu_event_init(struct perf_event *event)
>   	if (!cpumask_test_cpu(event->cpu, &cspmu->associated_cpus)) {
>   		dev_dbg(cspmu->pmu.dev,
>   			"Requested cpu is not associated with the PMU\n");
> +		arm_cspmu_state_put();
>   		return -EINVAL;
>   	}
>   
>   	/* Enforce the current active CPU to handle the events in this PMU. */
>   	event->cpu = cpumask_first(&cspmu->active_cpu);
> -	if (event->cpu >= nr_cpu_ids)
> +	if (event->cpu >= nr_cpu_ids) {
> +		arm_cspmu_state_put();
>   		return -EINVAL;
> +	}
>   
> -	if (!arm_cspmu_validate_group(event))
> +	if (!arm_cspmu_validate_group(event)) {
> +		arm_cspmu_state_put();
>   		return -EINVAL;
> +	}
>   
>   	/*
>   	 * The logical counter id is tracked with hw_perf_event.extra_reg.idx.
> @@ -686,6 +879,8 @@ static int arm_cspmu_event_init(struct perf_event *event)
>   	hwc->extra_reg.idx = -1;
>   	hwc->config = cspmu->impl.ops.event_type(event);
>   
> +	arm_cspmu_state_put();
> +
>   	return 0;
>   }
>   
> @@ -864,13 +1059,22 @@ static int arm_cspmu_add(struct perf_event *event, int flags)
>   	struct hw_perf_event *hwc = &event->hw;
>   	int idx;
>   
> +	if (!try_arm_cspmu_state_get()) {
> +		pr_err("arm_cspmu event_init fail: driver is reprobing\n");
> +		return -EBUSY;
> +	}
> +
>   	if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
> -					   &cspmu->associated_cpus)))
> +					   &cspmu->associated_cpus))) {
> +		arm_cspmu_state_put();
>   		return -ENOENT;
> +	}
>   
>   	idx = arm_cspmu_get_event_idx(hw_events, event);
> -	if (idx < 0)
> +	if (idx < 0) {
> +		arm_cspmu_state_put();
>   		return idx;
> +	}
>   
>   	hw_events->events[idx] = event;
>   	hwc->idx = to_phys_idx(cspmu, idx);
> @@ -900,6 +1104,8 @@ static void arm_cspmu_del(struct perf_event *event, int flags)
>   	clear_bit(idx, hw_events->used_ctrs);
>   
>   	perf_event_update_userpage(event);
> +
> +	arm_cspmu_state_put();
>   }
>   
>   static void arm_cspmu_read(struct perf_event *event)
> @@ -1154,7 +1360,7 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
>   
>   	cspmu->pmu = (struct pmu){
>   		.task_ctx_nr	= perf_invalid_context,
> -		.module		= THIS_MODULE,
> +		.module		= cspmu->impl.module,
>   		.pmu_enable	= arm_cspmu_enable,
>   		.pmu_disable	= arm_cspmu_disable,
>   		.event_init	= arm_cspmu_event_init,
> @@ -1205,6 +1411,10 @@ static int arm_cspmu_device_probe(struct platform_device *pdev)
>   	if (ret)
>   		return ret;
>   
> +	mutex_lock(&arm_cspmu_lock);
> +	list_add(&cspmu->next, &arm_cspmus);
> +	mutex_unlock(&arm_cspmu_lock);
> +
>   	return 0;
>   }
>   
> @@ -1212,6 +1422,10 @@ static int arm_cspmu_device_remove(struct platform_device *pdev)
>   {
>   	struct arm_cspmu *cspmu = platform_get_drvdata(pdev);
>   
> +	mutex_lock(&arm_cspmu_lock);
> +	list_del(&cspmu->next);
> +	mutex_unlock(&arm_cspmu_lock);
> +
>   	perf_pmu_unregister(&cspmu->pmu);
>   	cpuhp_state_remove_instance(arm_cspmu_cpuhp_state, &cspmu->cpuhp_node);
>   
> @@ -1281,6 +1495,8 @@ static int __init arm_cspmu_init(void)
>   {
>   	int ret;
>   
> +	arm_cspmu_state_ready();
> +
>   	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
>   					"perf/arm/cspmu:online",
>   					arm_cspmu_cpu_online,
> diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h
> index 51323b175a4a..cf3458d9fc63 100644
> --- a/drivers/perf/arm_cspmu/arm_cspmu.h
> +++ b/drivers/perf/arm_cspmu/arm_cspmu.h
> @@ -1,7 +1,7 @@
>   /* SPDX-License-Identifier: GPL-2.0
>    *
>    * ARM CoreSight Architecture PMU driver.
> - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
> + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
>    *
>    */
>   
> @@ -68,7 +68,10 @@
>   
>   /* PMIIDR register field */
>   #define ARM_CSPMU_PMIIDR_IMPLEMENTER	GENMASK(11, 0)
> +#define ARM_CSPMU_PMIIDR_REVISION	GENMASK(15, 12)
> +#define ARM_CSPMU_PMIIDR_VARIANT	GENMASK(19, 16)
>   #define ARM_CSPMU_PMIIDR_PRODUCTID	GENMASK(31, 20)
> +#define ARM_CSPMU_PMIIDR_PVR		GENMASK(31, 12)
>   
>   struct arm_cspmu;
>   
> @@ -107,15 +110,36 @@ struct arm_cspmu_impl_ops {
>   					 struct attribute *attr, int unused);
>   };
>   
> +/* Vendor/implementer registration parameter. */
> +struct arm_cspmu_impl_param {
> +	/* JEDEC assigned implementer id of the vendor. */
> +	u32 impl_id;
> +	/*
> +	 * The pvr value and mask describes the device ids covered by the
> +	 * vendor backend. pvr contains the pattern of acceptable product,
> +	 * variant, and revision bits from device's PMIIDR. pvr_mask contains
> +	 * the relevant bits when comparing pvr. 0 value on the mask means any
> +	 * pvr value is supported.
> +	 */
> +	u32 pvr;
> +	u32 pvr_mask;
> +	/* Backend module. */
> +	struct module *module;
> +	/* Callback to vendor backend to init arm_cspmu_impl::ops. */
> +	int (*impl_init_ops)(struct arm_cspmu *cspmu);
> +};
> +
>   /* Vendor/implementer descriptor. */
>   struct arm_cspmu_impl {
>   	u32 pmiidr;
>   	struct arm_cspmu_impl_ops ops;
> +	struct module *module;
>   	void *ctx;
>   };
>   
>   /* Coresight PMU descriptor. */
>   struct arm_cspmu {
> +	struct list_head next;
>   	struct pmu pmu;
>   	struct device *dev;
>   	struct acpi_apmt_node *apmt_node;
> @@ -148,4 +172,10 @@ ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
>   				    struct device_attribute *attr,
>   				    char *buf);
>   
> +/* Register vendor backend. */
> +int arm_cspmu_impl_register(const struct arm_cspmu_impl_param *impl_param);
> +
> +/* Unregister vendor backend. */
> +void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_param *impl_param);
> +
>   #endif /* __ARM_CSPMU_H__ */
> diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c
> index 72ef80caa3c8..c179849ca893 100644
> --- a/drivers/perf/arm_cspmu/nvidia_cspmu.c
> +++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
> @@ -1,14 +1,18 @@
>   // SPDX-License-Identifier: GPL-2.0
>   /*
> - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
> + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
>    *
>    */
>   
>   /* Support for NVIDIA specific attributes. */
>   
> +#include <linux/module.h>
>   #include <linux/topology.h>
>   
> -#include "nvidia_cspmu.h"
> +#include "arm_cspmu.h"
> +
> +/* JEDEC-assigned JEP106 identification code */
> +#define ARM_CSPMU_IMPL_ID_NVIDIA     0x36B
>   
>   #define NV_PCIE_PORT_COUNT           10ULL
>   #define NV_PCIE_FILTER_ID_MASK       GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0)
> @@ -351,7 +355,7 @@ static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,
>   	return name;
>   }
>   
> -int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
> +static int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
>   {
>   	u32 prodid;
>   	struct nv_cspmu_ctx *ctx;
> @@ -395,6 +399,33 @@ int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
>   
>   	return 0;
>   }
> -EXPORT_SYMBOL_GPL(nv_cspmu_init_ops);
> +
> +/* Match all NVIDIA Coresight PMU devices */
> +static const struct arm_cspmu_impl_param nv_cspmu_param = {
> +	.module		= THIS_MODULE,
> +	.impl_id	= ARM_CSPMU_IMPL_ID_NVIDIA,
> +	.pvr		= 0,
> +	.pvr_mask	= 0,
> +	.impl_init_ops	= nv_cspmu_init_ops
> +};
> +
> +static int __init nvidia_cspmu_init(void)
> +{
> +	int ret;
> +
> +	ret = arm_cspmu_impl_register(&nv_cspmu_param);
> +	if (ret)
> +		pr_err("nvidia_cspmu backend registration error: %d\n", ret);
> +
> +	return ret;
> +}
> +
> +static void __exit nvidia_cspmu_exit(void)
> +{
> +	arm_cspmu_impl_unregister(&nv_cspmu_param);
> +}
> +
> +module_init(nvidia_cspmu_init);
> +module_exit(nvidia_cspmu_exit);
>   
>   MODULE_LICENSE("GPL v2");
> diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.h b/drivers/perf/arm_cspmu/nvidia_cspmu.h
> deleted file mode 100644
> index 71e18f0dc50b..000000000000
> --- a/drivers/perf/arm_cspmu/nvidia_cspmu.h
> +++ /dev/null
> @@ -1,17 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0
> - *
> - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
> - *
> - */
> -
> -/* Support for NVIDIA specific attributes. */
> -
> -#ifndef __NVIDIA_CSPMU_H__
> -#define __NVIDIA_CSPMU_H__
> -
> -#include "arm_cspmu.h"
> -
> -/* Allocate NVIDIA descriptor. */
> -int nv_cspmu_init_ops(struct arm_cspmu *cspmu);
> -
> -#endif /* __NVIDIA_CSPMU_H__ */
> 
> base-commit: 73f2c2a7e1d2b31fdd5faa6dfa151c437a6c0a5a
> prerequisite-patch-id: fb691dc01d87597bcbaa4d352073304287c20f73

Besar Wicaksono April 28, 2023, 10:23 p.m. UTC | #7

Hi Robin and Suzuki,

> -----Original Message-----
> From: Robin Murphy <robin.murphy@arm.com>
> Sent: Thursday, April 27, 2023 7:21 AM
> To: Besar Wicaksono <bwicaksono@nvidia.com>; suzuki.poulose@arm.com;
> catalin.marinas@arm.com; will@kernel.org; mark.rutland@arm.com
> Cc: linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org; linux-
> tegra@vger.kernel.org; Thierry Reding <treding@nvidia.com>; Jonathan
> Hunter <jonathanh@nvidia.com>; Vikram Sethi <vsethi@nvidia.com>; Richard
> Wiley <rwiley@nvidia.com>; Eric Funsten <efunsten@nvidia.com>
> Subject: Re: [PATCH v2] perf: arm_cspmu: Separate Arm and vendor module
> 
> External email: Use caution opening links or attachments
> 
> 
> On 2023-04-18 07:20, Besar Wicaksono wrote:
> > Arm Coresight PMU driver consists of main standard code and vendor
> > backend code. Both are currently built as a single module.
> > This patch adds vendor registration API to separate the two to
> > keep things modular. Vendor module shall register to the main
> > module on loading and trigger device reprobe.
> 
> I think it might be considerably cleaner and safer if the main driver
> retained at least some knowledge of the PMIIDR matches and used those to
> explicity request the relevant module. Otherwise, not only is there an
> awful lot of fiddly complexity here, but there's also quite a burden on
> the user to know which modules they have to load to get full
> functionality on any given system.

Do you mean like keep the existing match table as a whitelist, and associate 
each entry with the backend module name to load it from the main driver ?

> 
> FYI I've just started working on adding devicetree support, and I do
> need the generic architectural functionality to keep working in the
> absence of any imp-def backend.

W.r.t the reprobe discussion with Suzuki, this would mean the expected
behavior is to attach the device to standard imp as fallback/default.
Suzuki, my preference is not supporting delayed reprobe on event->destroy
due to the potential access to stale data. We should just fail the backend
registration if one of the device is in use.

Thank you for working on the devicetree support, please let us know
if we could help.

Regards,
Besar

> 
> Thanks,
> Robin.
> 
> >
> > Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
> > ---
> >
> > Changes from v1:
> >   * Added separate Kconfig entry for nvidia backend
> >   * Added lock to protect accesses to the lists
> >   * Added support for matching subset devices from a vendor
> >   * Added state tracking to avoid reprobe when a device is in use
> > v1: ttps://lore.kernel.org/linux-arm-kernel/20230403163905.20354-1-
> bwicaksono@nvidia.com/T/#u
> >
> > ---
> >   drivers/perf/arm_cspmu/Kconfig        |   9 +-
> >   drivers/perf/arm_cspmu/Makefile       |   6 +-
> >   drivers/perf/arm_cspmu/arm_cspmu.c    | 280
> +++++++++++++++++++++++---
> >   drivers/perf/arm_cspmu/arm_cspmu.h    |  32 ++-
> >   drivers/perf/arm_cspmu/nvidia_cspmu.c |  39 +++-
> >   drivers/perf/arm_cspmu/nvidia_cspmu.h |  17 --
> >   6 files changed, 325 insertions(+), 58 deletions(-)
> >   delete mode 100644 drivers/perf/arm_cspmu/nvidia_cspmu.h
> >
> > diff --git a/drivers/perf/arm_cspmu/Kconfig
> b/drivers/perf/arm_cspmu/Kconfig
> > index 0b316fe69a45..8ce7b45a0075 100644
> > --- a/drivers/perf/arm_cspmu/Kconfig
> > +++ b/drivers/perf/arm_cspmu/Kconfig
> > @@ -1,6 +1,6 @@
> >   # SPDX-License-Identifier: GPL-2.0
> >   #
> > -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> > +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> >
> >   config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> >       tristate "ARM Coresight Architecture PMU"
> > @@ -11,3 +11,10 @@ config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> >         based on ARM CoreSight PMU architecture. Note that this PMU
> >         architecture does not have relationship with the ARM CoreSight
> >         Self-Hosted Tracing.
> > +
> > +config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> > +     tristate "NVIDIA Coresight Architecture PMU"
> > +     depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
> > +     help
> > +       Provides NVIDIA specific attributes for performance monitoring unit
> > +       (PMU) devices based on ARM CoreSight PMU architecture.
> > diff --git a/drivers/perf/arm_cspmu/Makefile
> b/drivers/perf/arm_cspmu/Makefile
> > index fedb17df982d..f8ae22411d59 100644
> > --- a/drivers/perf/arm_cspmu/Makefile
> > +++ b/drivers/perf/arm_cspmu/Makefile
> > @@ -1,6 +1,6 @@
> > -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> > +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> >   #
> >   # SPDX-License-Identifier: GPL-2.0
> >
> > -obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) +=
> arm_cspmu_module.o
> > -arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
> > +obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) +=
> arm_cspmu.o
> > +obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) +=
> nvidia_cspmu.o
> > diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c
> b/drivers/perf/arm_cspmu/arm_cspmu.c
> > index e31302ab7e37..c55ea2b74454 100644
> > --- a/drivers/perf/arm_cspmu/arm_cspmu.c
> > +++ b/drivers/perf/arm_cspmu/arm_cspmu.c
> > @@ -16,7 +16,7 @@
> >    * The user should refer to the vendor technical documentation to get
> details
> >    * about the supported events.
> >    *
> > - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> > + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> >    *
> >    */
> >
> > @@ -25,13 +25,14 @@
> >   #include <linux/ctype.h>
> >   #include <linux/interrupt.h>
> >   #include <linux/io-64-nonatomic-lo-hi.h>
> > +#include <linux/list.h>
> >   #include <linux/module.h>
> > +#include <linux/mutex.h>
> >   #include <linux/perf_event.h>
> >   #include <linux/platform_device.h>
> >   #include <acpi/processor.h>
> >
> >   #include "arm_cspmu.h"
> > -#include "nvidia_cspmu.h"
> >
> >   #define PMUNAME "arm_cspmu"
> >   #define DRVNAME "arm-cs-arch-pmu"
> > @@ -117,11 +118,52 @@
> >    */
> >   #define HILOHI_MAX_POLL     1000
> >
> > -/* JEDEC-assigned JEP106 identification code */
> > -#define ARM_CSPMU_IMPL_ID_NVIDIA             0x36B
> > -
> >   static unsigned long arm_cspmu_cpuhp_state;
> >
> > +/* List of Coresight PMU instances in the system. */
> > +static LIST_HEAD(arm_cspmus);
> > +
> > +/* List of registered vendor backends. */
> > +static LIST_HEAD(arm_cspmu_impls);
> > +
> > +static DEFINE_MUTEX(arm_cspmu_lock);
> > +
> > +/*
> > + * State of the generic driver.
> > + * 0 => registering backend.
> > + * 1 => ready to use.
> > + * 2 or more => in use.
> > + */
> > +#define ARM_CSPMU_STATE_REG  0
> > +#define ARM_CSPMU_STATE_READY        1
> > +static atomic_t arm_cspmu_state;
> > +
> > +static void arm_cspmu_state_ready(void)
> > +{
> > +     atomic_set(&arm_cspmu_state, ARM_CSPMU_STATE_READY);
> > +}
> > +
> > +static bool try_arm_cspmu_state_reg(void)
> > +{
> > +     const int old = ARM_CSPMU_STATE_READY;
> > +     const int new = ARM_CSPMU_STATE_REG;
> > +
> > +     return atomic_cmpxchg(&arm_cspmu_state, old, new) == old;
> > +}
> > +
> > +static bool try_arm_cspmu_state_get(void)
> > +{
> > +     return atomic_inc_not_zero(&arm_cspmu_state);
> > +}
> > +
> > +static void arm_cspmu_state_put(void)
> > +{
> > +     int ret;
> > +
> > +     ret = atomic_dec_if_positive(&arm_cspmu_state);
> > +     WARN_ON(ret < 0);
> > +}
> > +
> >   /*
> >    * In CoreSight PMU architecture, all of the MMIO registers are 32-bit
> except
> >    * counter register. The counter register can be implemented as 32-bit or
> 64-bit
> > @@ -380,26 +422,161 @@ static struct attribute_group
> arm_cspmu_cpumask_attr_group = {
> >   };
> >
> >   struct impl_match {
> > -     u32 pmiidr;
> > -     u32 mask;
> > -     int (*impl_init_ops)(struct arm_cspmu *cspmu);
> > +     struct list_head next;
> > +     struct arm_cspmu_impl_param param;
> >   };
> >
> > -static const struct impl_match impl_match[] = {
> > -     {
> > -       .pmiidr = ARM_CSPMU_IMPL_ID_NVIDIA,
> > -       .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
> > -       .impl_init_ops = nv_cspmu_init_ops
> > -     },
> > -     {}
> > -};
> > +static struct arm_cspmu_impl_param to_impl_param(const struct
> arm_cspmu *cspmu)
> > +{
> > +     struct arm_cspmu_impl_param ret = {0};
> > +     u32 pmiidr = cspmu->impl.pmiidr;
> > +
> > +     ret.impl_id = FIELD_GET(ARM_CSPMU_PMIIDR_IMPLEMENTER, pmiidr);
> > +     ret.pvr = FIELD_GET(ARM_CSPMU_PMIIDR_PVR, pmiidr);
> > +     ret.pvr_mask = GENMASK(31, 0);
> > +
> > +     return ret;
> > +}
> > +
> > +static bool impl_param_match(const struct arm_cspmu_impl_param *A,
> > +                          const struct arm_cspmu_impl_param *B)
> > +{
> > +     /*
> > +      * Match criteria:
> > +      * - Implementer id should match.
> > +      * - A's device id is within B's range, or vice versa. This allows
> > +      *   vendor to register backend for a range of devices.
> > +      */
> > +     if ((A->impl_id == B->impl_id) &&
> > +         (((A->pvr & A->pvr_mask) == (B->pvr & A->pvr_mask)) ||
> > +          ((A->pvr & B->pvr_mask) == (B->pvr & B->pvr_mask))))
> > +             return true;
> > +
> > +     return false;
> > +}
> > +
> > +static struct impl_match *impl_match_find(
> > +     const struct arm_cspmu_impl_param *impl_param)
> > +{
> > +     struct impl_match *impl_match;
> > +
> > +     list_for_each_entry(impl_match, &arm_cspmu_impls, next) {
> > +             if (impl_param_match(impl_param, &impl_match->param))
> > +                     return impl_match;
> > +     }
> > +
> > +     return NULL;
> > +}
> > +
> > +static int arm_cspmu_impl_reprobe(
> > +     const struct arm_cspmu_impl_param *impl_param)
> > +{
> > +     struct arm_cspmu *cspmu, *temp;
> > +     LIST_HEAD(reprobe_list);
> > +     int ret = 0;
> > +
> > +     mutex_lock(&arm_cspmu_lock);
> > +
> > +     /* Move the matching devices to temp list to avoid recursive lock. */
> > +     list_for_each_entry_safe(cspmu, temp, &arm_cspmus, next) {
> > +             struct arm_cspmu_impl_param match_param =
> to_impl_param(cspmu);
> > +
> > +             if (impl_param_match(impl_param, &match_param))
> > +                     list_move(&cspmu->next, &reprobe_list);
> > +     }
> > +
> > +     mutex_unlock(&arm_cspmu_lock);
> > +
> > +     /* Reprobe the devices. */
> > +     list_for_each_entry_safe(cspmu, temp, &reprobe_list, next) {
> > +             ret = device_reprobe(cspmu->dev);
> > +             if (ret) {
> > +                     pr_err("arm_cspmu fail reprobe err: %d\n", ret);
> > +                     return ret;
> > +             }
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +int arm_cspmu_impl_register(const struct arm_cspmu_impl_param
> *impl_param)
> > +{
> > +     struct impl_match *match;
> > +     int ret = 0;
> > +
> > +     if (!try_arm_cspmu_state_reg()) {
> > +             pr_err("arm_cspmu reg failed, device(s) is in use\n");
> > +             return -EBUSY;
> > +     }
> > +
> > +     mutex_lock(&arm_cspmu_lock);
> > +
> > +     match = impl_match_find(impl_param);
> > +     if (match) {
> > +             pr_err("arm_cspmu reg failed, impl: 0x%x, pvr: 0x%x, pvr_mask:
> 0x%x already exists\n",
> > +                     match->param.impl_id, match->param.pvr,
> > +                     match->param.pvr_mask);
> > +             mutex_unlock(&arm_cspmu_lock);
> > +             arm_cspmu_state_ready();
> > +             return -EINVAL;
> > +     }
> > +
> > +     match = kzalloc(sizeof(struct impl_match), GFP_KERNEL);
> > +     if (!match) {
> > +             mutex_unlock(&arm_cspmu_lock);
> > +             arm_cspmu_state_ready();
> > +             return -ENOMEM;
> > +     }
> > +
> > +     memcpy(&match->param, impl_param, sizeof(match->param));
> > +     list_add(&match->next, &arm_cspmu_impls);
> > +
> > +     mutex_unlock(&arm_cspmu_lock);
> > +
> > +     /* Replace generic backend with vendor implementation. */
> > +     ret = arm_cspmu_impl_reprobe(impl_param);
> > +
> > +     if (ret)
> > +             arm_cspmu_impl_unregister(impl_param);
> > +
> > +     arm_cspmu_state_ready();
> > +
> > +     return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(arm_cspmu_impl_register);
> > +
> > +void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_param
> *impl_param)
> > +{
> > +     struct impl_match *match;
> > +
> > +     mutex_lock(&arm_cspmu_lock);
> > +
> > +     match = impl_match_find(impl_param);
> > +     if (!match) {
> > +             pr_err("arm_cspmu unreg failed, unable to find impl: 0x%x, pvr:
> 0x%x, pvr_mask: 0x%x\n",
> > +                     impl_param->impl_id, impl_param->pvr,
> > +                     impl_param->pvr_mask);
> > +             mutex_unlock(&arm_cspmu_lock);
> > +             return;
> > +     }
> > +
> > +     list_del(&match->next);
> > +     kfree(match);
> > +
> > +     mutex_unlock(&arm_cspmu_lock);
> > +
> > +     /* Re-attach devices to standard driver. */
> > +     arm_cspmu_impl_reprobe(impl_param);
> > +}
> > +EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister);
> >
> >   static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
> >   {
> > -     int ret;
> > +     int ret = 0;
> >       struct acpi_apmt_node *apmt_node = cspmu->apmt_node;
> >       struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
> > -     const struct impl_match *match = impl_match;
> > +     struct arm_cspmu_impl_param match_param = {0};
> > +     const struct impl_match *match;
> >
> >       /*
> >        * Get PMU implementer and product id from APMT node.
> > @@ -410,19 +587,23 @@ static int arm_cspmu_init_impl_ops(struct
> arm_cspmu *cspmu)
> >               (apmt_node->impl_id) ? apmt_node->impl_id :
> >                                      readl(cspmu->base0 + PMIIDR);
> >
> > -     /* Find implementer specific attribute ops. */
> > -     for (; match->pmiidr; match++) {
> > -             const u32 mask = match->mask;
> > +     cspmu->impl.module = THIS_MODULE;
> >
> > -             if ((match->pmiidr & mask) == (cspmu->impl.pmiidr & mask)) {
> > -                     ret = match->impl_init_ops(cspmu);
> > -                     if (ret)
> > -                             return ret;
> > +     mutex_lock(&arm_cspmu_lock);
> >
> > -                     break;
> > -             }
> > +     /* Find implementer specific attribute ops. */
> > +     match_param = to_impl_param(cspmu);
> > +     match = impl_match_find(&match_param);
> > +     if (match) {
> > +             cspmu->impl.module = match->param.module;
> > +             ret = match->param.impl_init_ops(cspmu);
> >       }
> >
> > +     mutex_unlock(&arm_cspmu_lock);
> > +
> > +     if (ret)
> > +             return ret;
> > +
> >       /* Use default callbacks if implementer doesn't provide one. */
> >       CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs);
> >       CHECK_DEFAULT_IMPL_OPS(impl_ops, get_format_attrs);
> > @@ -639,6 +820,11 @@ static int arm_cspmu_event_init(struct perf_event
> *event)
> >       struct arm_cspmu *cspmu;
> >       struct hw_perf_event *hwc = &event->hw;
> >
> > +     if (!try_arm_cspmu_state_get()) {
> > +             pr_err("arm_cspmu event_init fail: driver is reprobing\n");
> > +             return -EBUSY;
> > +     }
> > +
> >       cspmu = to_arm_cspmu(event->pmu);
> >
> >       /*
> > @@ -648,12 +834,14 @@ static int arm_cspmu_event_init(struct
> perf_event *event)
> >       if (is_sampling_event(event)) {
> >               dev_dbg(cspmu->pmu.dev,
> >                       "Can't support sampling events\n");
> > +             arm_cspmu_state_put();
> >               return -EOPNOTSUPP;
> >       }
> >
> >       if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) {
> >               dev_dbg(cspmu->pmu.dev,
> >                       "Can't support per-task counters\n");
> > +             arm_cspmu_state_put();
> >               return -EINVAL;
> >       }
> >
> > @@ -664,16 +852,21 @@ static int arm_cspmu_event_init(struct
> perf_event *event)
> >       if (!cpumask_test_cpu(event->cpu, &cspmu->associated_cpus)) {
> >               dev_dbg(cspmu->pmu.dev,
> >                       "Requested cpu is not associated with the PMU\n");
> > +             arm_cspmu_state_put();
> >               return -EINVAL;
> >       }
> >
> >       /* Enforce the current active CPU to handle the events in this PMU. */
> >       event->cpu = cpumask_first(&cspmu->active_cpu);
> > -     if (event->cpu >= nr_cpu_ids)
> > +     if (event->cpu >= nr_cpu_ids) {
> > +             arm_cspmu_state_put();
> >               return -EINVAL;
> > +     }
> >
> > -     if (!arm_cspmu_validate_group(event))
> > +     if (!arm_cspmu_validate_group(event)) {
> > +             arm_cspmu_state_put();
> >               return -EINVAL;
> > +     }
> >
> >       /*
> >        * The logical counter id is tracked with hw_perf_event.extra_reg.idx.
> > @@ -686,6 +879,8 @@ static int arm_cspmu_event_init(struct perf_event
> *event)
> >       hwc->extra_reg.idx = -1;
> >       hwc->config = cspmu->impl.ops.event_type(event);
> >
> > +     arm_cspmu_state_put();
> > +
> >       return 0;
> >   }
> >
> > @@ -864,13 +1059,22 @@ static int arm_cspmu_add(struct perf_event
> *event, int flags)
> >       struct hw_perf_event *hwc = &event->hw;
> >       int idx;
> >
> > +     if (!try_arm_cspmu_state_get()) {
> > +             pr_err("arm_cspmu event_init fail: driver is reprobing\n");
> > +             return -EBUSY;
> > +     }
> > +
> >       if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
> > -                                        &cspmu->associated_cpus)))
> > +                                        &cspmu->associated_cpus))) {
> > +             arm_cspmu_state_put();
> >               return -ENOENT;
> > +     }
> >
> >       idx = arm_cspmu_get_event_idx(hw_events, event);
> > -     if (idx < 0)
> > +     if (idx < 0) {
> > +             arm_cspmu_state_put();
> >               return idx;
> > +     }
> >
> >       hw_events->events[idx] = event;
> >       hwc->idx = to_phys_idx(cspmu, idx);
> > @@ -900,6 +1104,8 @@ static void arm_cspmu_del(struct perf_event
> *event, int flags)
> >       clear_bit(idx, hw_events->used_ctrs);
> >
> >       perf_event_update_userpage(event);
> > +
> > +     arm_cspmu_state_put();
> >   }
> >
> >   static void arm_cspmu_read(struct perf_event *event)
> > @@ -1154,7 +1360,7 @@ static int arm_cspmu_register_pmu(struct
> arm_cspmu *cspmu)
> >
> >       cspmu->pmu = (struct pmu){
> >               .task_ctx_nr    = perf_invalid_context,
> > -             .module         = THIS_MODULE,
> > +             .module         = cspmu->impl.module,
> >               .pmu_enable     = arm_cspmu_enable,
> >               .pmu_disable    = arm_cspmu_disable,
> >               .event_init     = arm_cspmu_event_init,
> > @@ -1205,6 +1411,10 @@ static int arm_cspmu_device_probe(struct
> platform_device *pdev)
> >       if (ret)
> >               return ret;
> >
> > +     mutex_lock(&arm_cspmu_lock);
> > +     list_add(&cspmu->next, &arm_cspmus);
> > +     mutex_unlock(&arm_cspmu_lock);
> > +
> >       return 0;
> >   }
> >
> > @@ -1212,6 +1422,10 @@ static int arm_cspmu_device_remove(struct
> platform_device *pdev)
> >   {
> >       struct arm_cspmu *cspmu = platform_get_drvdata(pdev);
> >
> > +     mutex_lock(&arm_cspmu_lock);
> > +     list_del(&cspmu->next);
> > +     mutex_unlock(&arm_cspmu_lock);
> > +
> >       perf_pmu_unregister(&cspmu->pmu);
> >       cpuhp_state_remove_instance(arm_cspmu_cpuhp_state, &cspmu-
> >cpuhp_node);
> >
> > @@ -1281,6 +1495,8 @@ static int __init arm_cspmu_init(void)
> >   {
> >       int ret;
> >
> > +     arm_cspmu_state_ready();
> > +
> >       ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
> >                                       "perf/arm/cspmu:online",
> >                                       arm_cspmu_cpu_online,
> > diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h
> b/drivers/perf/arm_cspmu/arm_cspmu.h
> > index 51323b175a4a..cf3458d9fc63 100644
> > --- a/drivers/perf/arm_cspmu/arm_cspmu.h
> > +++ b/drivers/perf/arm_cspmu/arm_cspmu.h
> > @@ -1,7 +1,7 @@
> >   /* SPDX-License-Identifier: GPL-2.0
> >    *
> >    * ARM CoreSight Architecture PMU driver.
> > - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> > + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> >    *
> >    */
> >
> > @@ -68,7 +68,10 @@
> >
> >   /* PMIIDR register field */
> >   #define ARM_CSPMU_PMIIDR_IMPLEMENTER        GENMASK(11, 0)
> > +#define ARM_CSPMU_PMIIDR_REVISION    GENMASK(15, 12)
> > +#define ARM_CSPMU_PMIIDR_VARIANT     GENMASK(19, 16)
> >   #define ARM_CSPMU_PMIIDR_PRODUCTID  GENMASK(31, 20)
> > +#define ARM_CSPMU_PMIIDR_PVR         GENMASK(31, 12)
> >
> >   struct arm_cspmu;
> >
> > @@ -107,15 +110,36 @@ struct arm_cspmu_impl_ops {
> >                                        struct attribute *attr, int unused);
> >   };
> >
> > +/* Vendor/implementer registration parameter. */
> > +struct arm_cspmu_impl_param {
> > +     /* JEDEC assigned implementer id of the vendor. */
> > +     u32 impl_id;
> > +     /*
> > +      * The pvr value and mask describes the device ids covered by the
> > +      * vendor backend. pvr contains the pattern of acceptable product,
> > +      * variant, and revision bits from device's PMIIDR. pvr_mask contains
> > +      * the relevant bits when comparing pvr. 0 value on the mask means any
> > +      * pvr value is supported.
> > +      */
> > +     u32 pvr;
> > +     u32 pvr_mask;
> > +     /* Backend module. */
> > +     struct module *module;
> > +     /* Callback to vendor backend to init arm_cspmu_impl::ops. */
> > +     int (*impl_init_ops)(struct arm_cspmu *cspmu);
> > +};
> > +
> >   /* Vendor/implementer descriptor. */
> >   struct arm_cspmu_impl {
> >       u32 pmiidr;
> >       struct arm_cspmu_impl_ops ops;
> > +     struct module *module;
> >       void *ctx;
> >   };
> >
> >   /* Coresight PMU descriptor. */
> >   struct arm_cspmu {
> > +     struct list_head next;
> >       struct pmu pmu;
> >       struct device *dev;
> >       struct acpi_apmt_node *apmt_node;
> > @@ -148,4 +172,10 @@ ssize_t arm_cspmu_sysfs_format_show(struct
> device *dev,
> >                                   struct device_attribute *attr,
> >                                   char *buf);
> >
> > +/* Register vendor backend. */
> > +int arm_cspmu_impl_register(const struct arm_cspmu_impl_param
> *impl_param);
> > +
> > +/* Unregister vendor backend. */
> > +void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_param
> *impl_param);
> > +
> >   #endif /* __ARM_CSPMU_H__ */
> > diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c
> b/drivers/perf/arm_cspmu/nvidia_cspmu.c
> > index 72ef80caa3c8..c179849ca893 100644
> > --- a/drivers/perf/arm_cspmu/nvidia_cspmu.c
> > +++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
> > @@ -1,14 +1,18 @@
> >   // SPDX-License-Identifier: GPL-2.0
> >   /*
> > - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> > + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> >    *
> >    */
> >
> >   /* Support for NVIDIA specific attributes. */
> >
> > +#include <linux/module.h>
> >   #include <linux/topology.h>
> >
> > -#include "nvidia_cspmu.h"
> > +#include "arm_cspmu.h"
> > +
> > +/* JEDEC-assigned JEP106 identification code */
> > +#define ARM_CSPMU_IMPL_ID_NVIDIA     0x36B
> >
> >   #define NV_PCIE_PORT_COUNT           10ULL
> >   #define NV_PCIE_FILTER_ID_MASK
> GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0)
> > @@ -351,7 +355,7 @@ static char *nv_cspmu_format_name(const struct
> arm_cspmu *cspmu,
> >       return name;
> >   }
> >
> > -int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
> > +static int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
> >   {
> >       u32 prodid;
> >       struct nv_cspmu_ctx *ctx;
> > @@ -395,6 +399,33 @@ int nv_cspmu_init_ops(struct arm_cspmu
> *cspmu)
> >
> >       return 0;
> >   }
> > -EXPORT_SYMBOL_GPL(nv_cspmu_init_ops);
> > +
> > +/* Match all NVIDIA Coresight PMU devices */
> > +static const struct arm_cspmu_impl_param nv_cspmu_param = {
> > +     .module         = THIS_MODULE,
> > +     .impl_id        = ARM_CSPMU_IMPL_ID_NVIDIA,
> > +     .pvr            = 0,
> > +     .pvr_mask       = 0,
> > +     .impl_init_ops  = nv_cspmu_init_ops
> > +};
> > +
> > +static int __init nvidia_cspmu_init(void)
> > +{
> > +     int ret;
> > +
> > +     ret = arm_cspmu_impl_register(&nv_cspmu_param);
> > +     if (ret)
> > +             pr_err("nvidia_cspmu backend registration error: %d\n", ret);
> > +
> > +     return ret;
> > +}
> > +
> > +static void __exit nvidia_cspmu_exit(void)
> > +{
> > +     arm_cspmu_impl_unregister(&nv_cspmu_param);
> > +}
> > +
> > +module_init(nvidia_cspmu_init);
> > +module_exit(nvidia_cspmu_exit);
> >
> >   MODULE_LICENSE("GPL v2");
> > diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.h
> b/drivers/perf/arm_cspmu/nvidia_cspmu.h
> > deleted file mode 100644
> > index 71e18f0dc50b..000000000000
> > --- a/drivers/perf/arm_cspmu/nvidia_cspmu.h
> > +++ /dev/null
> > @@ -1,17 +0,0 @@
> > -/* SPDX-License-Identifier: GPL-2.0
> > - *
> > - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights
> reserved.
> > - *
> > - */
> > -
> > -/* Support for NVIDIA specific attributes. */
> > -
> > -#ifndef __NVIDIA_CSPMU_H__
> > -#define __NVIDIA_CSPMU_H__
> > -
> > -#include "arm_cspmu.h"
> > -
> > -/* Allocate NVIDIA descriptor. */
> > -int nv_cspmu_init_ops(struct arm_cspmu *cspmu);
> > -
> > -#endif /* __NVIDIA_CSPMU_H__ */
> >
> > base-commit: 73f2c2a7e1d2b31fdd5faa6dfa151c437a6c0a5a
> > prerequisite-patch-id: fb691dc01d87597bcbaa4d352073304287c20f73

Robin Murphy May 2, 2023, 12:38 p.m. UTC | #8

On 2023-04-28 23:23, Besar Wicaksono wrote:
> Hi Robin and Suzuki,
> 
>> -----Original Message-----
>> From: Robin Murphy <robin.murphy@arm.com>
>> Sent: Thursday, April 27, 2023 7:21 AM
>> To: Besar Wicaksono <bwicaksono@nvidia.com>; suzuki.poulose@arm.com;
>> catalin.marinas@arm.com; will@kernel.org; mark.rutland@arm.com
>> Cc: linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org; linux-
>> tegra@vger.kernel.org; Thierry Reding <treding@nvidia.com>; Jonathan
>> Hunter <jonathanh@nvidia.com>; Vikram Sethi <vsethi@nvidia.com>; Richard
>> Wiley <rwiley@nvidia.com>; Eric Funsten <efunsten@nvidia.com>
>> Subject: Re: [PATCH v2] perf: arm_cspmu: Separate Arm and vendor module
>>
>> External email: Use caution opening links or attachments
>>
>>
>> On 2023-04-18 07:20, Besar Wicaksono wrote:
>>> Arm Coresight PMU driver consists of main standard code and vendor
>>> backend code. Both are currently built as a single module.
>>> This patch adds vendor registration API to separate the two to
>>> keep things modular. Vendor module shall register to the main
>>> module on loading and trigger device reprobe.
>>
>> I think it might be considerably cleaner and safer if the main driver
>> retained at least some knowledge of the PMIIDR matches and used those to
>> explicity request the relevant module. Otherwise, not only is there an
>> awful lot of fiddly complexity here, but there's also quite a burden on
>> the user to know which modules they have to load to get full
>> functionality on any given system.
> 
> Do you mean like keep the existing match table as a whitelist, and associate
> each entry with the backend module name to load it from the main driver ?

It would essentially be a table that matches a PMIIDR filter to a module 
name. Having looked for existing examples of this kind of usage model, 
in terms of the overall shape it might look closest to a very 
stripped-down version of the crypto manager.

>> FYI I've just started working on adding devicetree support, and I do
>> need the generic architectural functionality to keep working in the
>> absence of any imp-def backend.
> 
> W.r.t the reprobe discussion with Suzuki, this would mean the expected
> behavior is to attach the device to standard imp as fallback/default.
> Suzuki, my preference is not supporting delayed reprobe on event->destroy
> due to the potential access to stale data. We should just fail the backend
> registration if one of the device is in use.

We shouldn't need to worry about that at all. If requesting an expected 
implementation module fails and those PMUs end up falling back to 
baseline functionality, there's no need to actively deny the backend if 
the module is manually loaded later, it merely won't be used. As far as 
reprobing goes, it seems reasonable for the user to remove/reload the 
main module after fixing the backend module availability if it matters 
to them. Or maybe we could just EPROBE_DEFER instead of falling back to 
baseline if we know the module is enabled and *should* be available; I 
don't have a particular preference either way. The main thing is just to 
have the backend modules work in a simple, intuitive, and mostly 
automatic manner, without all the complexity of effectively 
reimplementing a whole custom driver model in between perf and the real 
driver model.

I think the only really notable thing about this approach is that it 
would probably need to make sure the PMU probe runs asynchronously from 
the main module_init.

Thanks,
Robin.

diff mbox series

Patch

diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig
index 0b316fe69a45..8ce7b45a0075 100644
--- a/drivers/perf/arm_cspmu/Kconfig
+++ b/drivers/perf/arm_cspmu/Kconfig
@@ -1,6 +1,6 @@ 
 # SPDX-License-Identifier: GPL-2.0
 #
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
 	tristate "ARM Coresight Architecture PMU"
@@ -11,3 +11,10 @@  config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
 	  based on ARM CoreSight PMU architecture. Note that this PMU
 	  architecture does not have relationship with the ARM CoreSight
 	  Self-Hosted Tracing.
+
+config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+	tristate "NVIDIA Coresight Architecture PMU"
+	depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+	help
+	  Provides NVIDIA specific attributes for performance monitoring unit
+	  (PMU) devices based on ARM CoreSight PMU architecture.
diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile
index fedb17df982d..f8ae22411d59 100644
--- a/drivers/perf/arm_cspmu/Makefile
+++ b/drivers/perf/arm_cspmu/Makefile
@@ -1,6 +1,6 @@ 
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: GPL-2.0
 
-obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
-arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
+obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu.o
+obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += nvidia_cspmu.o
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index e31302ab7e37..c55ea2b74454 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -16,7 +16,7 @@ 
  * The user should refer to the vendor technical documentation to get details
  * about the supported events.
  *
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  *
  */
 
@@ -25,13 +25,14 @@ 
 #include <linux/ctype.h>
 #include <linux/interrupt.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/list.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 #include <acpi/processor.h>
 
 #include "arm_cspmu.h"
-#include "nvidia_cspmu.h"
 
 #define PMUNAME "arm_cspmu"
 #define DRVNAME "arm-cs-arch-pmu"
@@ -117,11 +118,52 @@ 
  */
 #define HILOHI_MAX_POLL	1000
 
-/* JEDEC-assigned JEP106 identification code */
-#define ARM_CSPMU_IMPL_ID_NVIDIA		0x36B
-
 static unsigned long arm_cspmu_cpuhp_state;
 
+/* List of Coresight PMU instances in the system. */
+static LIST_HEAD(arm_cspmus);
+
+/* List of registered vendor backends. */
+static LIST_HEAD(arm_cspmu_impls);
+
+static DEFINE_MUTEX(arm_cspmu_lock);
+
+/*
+ * State of the generic driver.
+ * 0 => registering backend.
+ * 1 => ready to use.
+ * 2 or more => in use.
+ */
+#define ARM_CSPMU_STATE_REG	0
+#define ARM_CSPMU_STATE_READY	1
+static atomic_t arm_cspmu_state;
+
+static void arm_cspmu_state_ready(void)
+{
+	atomic_set(&arm_cspmu_state, ARM_CSPMU_STATE_READY);
+}
+
+static bool try_arm_cspmu_state_reg(void)
+{
+	const int old = ARM_CSPMU_STATE_READY;
+	const int new = ARM_CSPMU_STATE_REG;
+
+	return atomic_cmpxchg(&arm_cspmu_state, old, new) == old;
+}
+
+static bool try_arm_cspmu_state_get(void)
+{
+	return atomic_inc_not_zero(&arm_cspmu_state);
+}
+
+static void arm_cspmu_state_put(void)
+{
+	int ret;
+
+	ret = atomic_dec_if_positive(&arm_cspmu_state);
+	WARN_ON(ret < 0);
+}
+
 /*
  * In CoreSight PMU architecture, all of the MMIO registers are 32-bit except
  * counter register. The counter register can be implemented as 32-bit or 64-bit
@@ -380,26 +422,161 @@  static struct attribute_group arm_cspmu_cpumask_attr_group = {
 };
 
 struct impl_match {
-	u32 pmiidr;
-	u32 mask;
-	int (*impl_init_ops)(struct arm_cspmu *cspmu);
+	struct list_head next;
+	struct arm_cspmu_impl_param param;
 };
 
-static const struct impl_match impl_match[] = {
-	{
-	  .pmiidr = ARM_CSPMU_IMPL_ID_NVIDIA,
-	  .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
-	  .impl_init_ops = nv_cspmu_init_ops
-	},
-	{}
-};
+static struct arm_cspmu_impl_param to_impl_param(const struct arm_cspmu *cspmu)
+{
+	struct arm_cspmu_impl_param ret = {0};
+	u32 pmiidr = cspmu->impl.pmiidr;
+
+	ret.impl_id = FIELD_GET(ARM_CSPMU_PMIIDR_IMPLEMENTER, pmiidr);
+	ret.pvr = FIELD_GET(ARM_CSPMU_PMIIDR_PVR, pmiidr);
+	ret.pvr_mask = GENMASK(31, 0);
+
+	return ret;
+}
+
+static bool impl_param_match(const struct arm_cspmu_impl_param *A,
+			     const struct arm_cspmu_impl_param *B)
+{
+	/*
+	 * Match criteria:
+	 * - Implementer id should match.
+	 * - A's device id is within B's range, or vice versa. This allows
+	 *   vendor to register backend for a range of devices.
+	 */
+	if ((A->impl_id == B->impl_id) &&
+	    (((A->pvr & A->pvr_mask) == (B->pvr & A->pvr_mask)) ||
+	     ((A->pvr & B->pvr_mask) == (B->pvr & B->pvr_mask))))
+		return true;
+
+	return false;
+}
+
+static struct impl_match *impl_match_find(
+	const struct arm_cspmu_impl_param *impl_param)
+{
+	struct impl_match *impl_match;
+
+	list_for_each_entry(impl_match, &arm_cspmu_impls, next) {
+		if (impl_param_match(impl_param, &impl_match->param))
+			return impl_match;
+	}
+
+	return NULL;
+}
+
+static int arm_cspmu_impl_reprobe(
+	const struct arm_cspmu_impl_param *impl_param)
+{
+	struct arm_cspmu *cspmu, *temp;
+	LIST_HEAD(reprobe_list);
+	int ret = 0;
+
+	mutex_lock(&arm_cspmu_lock);
+
+	/* Move the matching devices to temp list to avoid recursive lock. */
+	list_for_each_entry_safe(cspmu, temp, &arm_cspmus, next) {
+		struct arm_cspmu_impl_param match_param = to_impl_param(cspmu);
+
+		if (impl_param_match(impl_param, &match_param))
+			list_move(&cspmu->next, &reprobe_list);
+	}
+
+	mutex_unlock(&arm_cspmu_lock);
+
+	/* Reprobe the devices. */
+	list_for_each_entry_safe(cspmu, temp, &reprobe_list, next) {
+		ret = device_reprobe(cspmu->dev);
+		if (ret) {
+			pr_err("arm_cspmu fail reprobe err: %d\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+int arm_cspmu_impl_register(const struct arm_cspmu_impl_param *impl_param)
+{
+	struct impl_match *match;
+	int ret = 0;
+
+	if (!try_arm_cspmu_state_reg()) {
+		pr_err("arm_cspmu reg failed, device(s) is in use\n");
+		return -EBUSY;
+	}
+
+	mutex_lock(&arm_cspmu_lock);
+
+	match = impl_match_find(impl_param);
+	if (match) {
+		pr_err("arm_cspmu reg failed, impl: 0x%x, pvr: 0x%x, pvr_mask: 0x%x already exists\n",
+			match->param.impl_id, match->param.pvr,
+			match->param.pvr_mask);
+		mutex_unlock(&arm_cspmu_lock);
+		arm_cspmu_state_ready();
+		return -EINVAL;
+	}
+
+	match = kzalloc(sizeof(struct impl_match), GFP_KERNEL);
+	if (!match) {
+		mutex_unlock(&arm_cspmu_lock);
+		arm_cspmu_state_ready();
+		return -ENOMEM;
+	}
+
+	memcpy(&match->param, impl_param, sizeof(match->param));
+	list_add(&match->next, &arm_cspmu_impls);
+
+	mutex_unlock(&arm_cspmu_lock);
+
+	/* Replace generic backend with vendor implementation. */
+	ret = arm_cspmu_impl_reprobe(impl_param);
+
+	if (ret)
+		arm_cspmu_impl_unregister(impl_param);
+
+	arm_cspmu_state_ready();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_impl_register);
+
+void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_param *impl_param)
+{
+	struct impl_match *match;
+
+	mutex_lock(&arm_cspmu_lock);
+
+	match = impl_match_find(impl_param);
+	if (!match) {
+		pr_err("arm_cspmu unreg failed, unable to find impl: 0x%x, pvr: 0x%x, pvr_mask: 0x%x\n",
+			impl_param->impl_id, impl_param->pvr,
+			impl_param->pvr_mask);
+		mutex_unlock(&arm_cspmu_lock);
+		return;
+	}
+
+	list_del(&match->next);
+	kfree(match);
+
+	mutex_unlock(&arm_cspmu_lock);
+
+	/* Re-attach devices to standard driver. */
+	arm_cspmu_impl_reprobe(impl_param);
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister);
 
 static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
 {
-	int ret;
+	int ret = 0;
 	struct acpi_apmt_node *apmt_node = cspmu->apmt_node;
 	struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
-	const struct impl_match *match = impl_match;
+	struct arm_cspmu_impl_param match_param = {0};
+	const struct impl_match *match;
 
 	/*
 	 * Get PMU implementer and product id from APMT node.
@@ -410,19 +587,23 @@  static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
 		(apmt_node->impl_id) ? apmt_node->impl_id :
 				       readl(cspmu->base0 + PMIIDR);
 
-	/* Find implementer specific attribute ops. */
-	for (; match->pmiidr; match++) {
-		const u32 mask = match->mask;
+	cspmu->impl.module = THIS_MODULE;
 
-		if ((match->pmiidr & mask) == (cspmu->impl.pmiidr & mask)) {
-			ret = match->impl_init_ops(cspmu);
-			if (ret)
-				return ret;
+	mutex_lock(&arm_cspmu_lock);
 
-			break;
-		}
+	/* Find implementer specific attribute ops. */
+	match_param = to_impl_param(cspmu);
+	match = impl_match_find(&match_param);
+	if (match) {
+		cspmu->impl.module = match->param.module;
+		ret = match->param.impl_init_ops(cspmu);
 	}
 
+	mutex_unlock(&arm_cspmu_lock);
+
+	if (ret)
+		return ret;
+
 	/* Use default callbacks if implementer doesn't provide one. */
 	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs);
 	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_format_attrs);
@@ -639,6 +820,11 @@  static int arm_cspmu_event_init(struct perf_event *event)
 	struct arm_cspmu *cspmu;
 	struct hw_perf_event *hwc = &event->hw;
 
+	if (!try_arm_cspmu_state_get()) {
+		pr_err("arm_cspmu event_init fail: driver is reprobing\n");
+		return -EBUSY;
+	}
+
 	cspmu = to_arm_cspmu(event->pmu);
 
 	/*
@@ -648,12 +834,14 @@  static int arm_cspmu_event_init(struct perf_event *event)
 	if (is_sampling_event(event)) {
 		dev_dbg(cspmu->pmu.dev,
 			"Can't support sampling events\n");
+		arm_cspmu_state_put();
 		return -EOPNOTSUPP;
 	}
 
 	if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) {
 		dev_dbg(cspmu->pmu.dev,
 			"Can't support per-task counters\n");
+		arm_cspmu_state_put();
 		return -EINVAL;
 	}
 
@@ -664,16 +852,21 @@  static int arm_cspmu_event_init(struct perf_event *event)
 	if (!cpumask_test_cpu(event->cpu, &cspmu->associated_cpus)) {
 		dev_dbg(cspmu->pmu.dev,
 			"Requested cpu is not associated with the PMU\n");
+		arm_cspmu_state_put();
 		return -EINVAL;
 	}
 
 	/* Enforce the current active CPU to handle the events in this PMU. */
 	event->cpu = cpumask_first(&cspmu->active_cpu);
-	if (event->cpu >= nr_cpu_ids)
+	if (event->cpu >= nr_cpu_ids) {
+		arm_cspmu_state_put();
 		return -EINVAL;
+	}
 
-	if (!arm_cspmu_validate_group(event))
+	if (!arm_cspmu_validate_group(event)) {
+		arm_cspmu_state_put();
 		return -EINVAL;
+	}
 
 	/*
 	 * The logical counter id is tracked with hw_perf_event.extra_reg.idx.
@@ -686,6 +879,8 @@  static int arm_cspmu_event_init(struct perf_event *event)
 	hwc->extra_reg.idx = -1;
 	hwc->config = cspmu->impl.ops.event_type(event);
 
+	arm_cspmu_state_put();
+
 	return 0;
 }
 
@@ -864,13 +1059,22 @@  static int arm_cspmu_add(struct perf_event *event, int flags)
 	struct hw_perf_event *hwc = &event->hw;
 	int idx;
 
+	if (!try_arm_cspmu_state_get()) {
+		pr_err("arm_cspmu event_init fail: driver is reprobing\n");
+		return -EBUSY;
+	}
+
 	if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
-					   &cspmu->associated_cpus)))
+					   &cspmu->associated_cpus))) {
+		arm_cspmu_state_put();
 		return -ENOENT;
+	}
 
 	idx = arm_cspmu_get_event_idx(hw_events, event);
-	if (idx < 0)
+	if (idx < 0) {
+		arm_cspmu_state_put();
 		return idx;
+	}
 
 	hw_events->events[idx] = event;
 	hwc->idx = to_phys_idx(cspmu, idx);
@@ -900,6 +1104,8 @@  static void arm_cspmu_del(struct perf_event *event, int flags)
 	clear_bit(idx, hw_events->used_ctrs);
 
 	perf_event_update_userpage(event);
+
+	arm_cspmu_state_put();
 }
 
 static void arm_cspmu_read(struct perf_event *event)
@@ -1154,7 +1360,7 @@  static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
 
 	cspmu->pmu = (struct pmu){
 		.task_ctx_nr	= perf_invalid_context,
-		.module		= THIS_MODULE,
+		.module		= cspmu->impl.module,
 		.pmu_enable	= arm_cspmu_enable,
 		.pmu_disable	= arm_cspmu_disable,
 		.event_init	= arm_cspmu_event_init,
@@ -1205,6 +1411,10 @@  static int arm_cspmu_device_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	mutex_lock(&arm_cspmu_lock);
+	list_add(&cspmu->next, &arm_cspmus);
+	mutex_unlock(&arm_cspmu_lock);
+
 	return 0;
 }
 
@@ -1212,6 +1422,10 @@  static int arm_cspmu_device_remove(struct platform_device *pdev)
 {
 	struct arm_cspmu *cspmu = platform_get_drvdata(pdev);
 
+	mutex_lock(&arm_cspmu_lock);
+	list_del(&cspmu->next);
+	mutex_unlock(&arm_cspmu_lock);
+
 	perf_pmu_unregister(&cspmu->pmu);
 	cpuhp_state_remove_instance(arm_cspmu_cpuhp_state, &cspmu->cpuhp_node);
 
@@ -1281,6 +1495,8 @@  static int __init arm_cspmu_init(void)
 {
 	int ret;
 
+	arm_cspmu_state_ready();
+
 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
 					"perf/arm/cspmu:online",
 					arm_cspmu_cpu_online,
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h
index 51323b175a4a..cf3458d9fc63 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.h
+++ b/drivers/perf/arm_cspmu/arm_cspmu.h
@@ -1,7 +1,7 @@ 
 /* SPDX-License-Identifier: GPL-2.0
  *
  * ARM CoreSight Architecture PMU driver.
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  *
  */
 
@@ -68,7 +68,10 @@ 
 
 /* PMIIDR register field */
 #define ARM_CSPMU_PMIIDR_IMPLEMENTER	GENMASK(11, 0)
+#define ARM_CSPMU_PMIIDR_REVISION	GENMASK(15, 12)
+#define ARM_CSPMU_PMIIDR_VARIANT	GENMASK(19, 16)
 #define ARM_CSPMU_PMIIDR_PRODUCTID	GENMASK(31, 20)
+#define ARM_CSPMU_PMIIDR_PVR		GENMASK(31, 12)
 
 struct arm_cspmu;
 
@@ -107,15 +110,36 @@  struct arm_cspmu_impl_ops {
 					 struct attribute *attr, int unused);
 };
 
+/* Vendor/implementer registration parameter. */
+struct arm_cspmu_impl_param {
+	/* JEDEC assigned implementer id of the vendor. */
+	u32 impl_id;
+	/*
+	 * The pvr value and mask describes the device ids covered by the
+	 * vendor backend. pvr contains the pattern of acceptable product,
+	 * variant, and revision bits from device's PMIIDR. pvr_mask contains
+	 * the relevant bits when comparing pvr. 0 value on the mask means any
+	 * pvr value is supported.
+	 */
+	u32 pvr;
+	u32 pvr_mask;
+	/* Backend module. */
+	struct module *module;
+	/* Callback to vendor backend to init arm_cspmu_impl::ops. */
+	int (*impl_init_ops)(struct arm_cspmu *cspmu);
+};
+
 /* Vendor/implementer descriptor. */
 struct arm_cspmu_impl {
 	u32 pmiidr;
 	struct arm_cspmu_impl_ops ops;
+	struct module *module;
 	void *ctx;
 };
 
 /* Coresight PMU descriptor. */
 struct arm_cspmu {
+	struct list_head next;
 	struct pmu pmu;
 	struct device *dev;
 	struct acpi_apmt_node *apmt_node;
@@ -148,4 +172,10 @@  ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
 				    struct device_attribute *attr,
 				    char *buf);
 
+/* Register vendor backend. */
+int arm_cspmu_impl_register(const struct arm_cspmu_impl_param *impl_param);
+
+/* Unregister vendor backend. */
+void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_param *impl_param);
+
 #endif /* __ARM_CSPMU_H__ */
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c
index 72ef80caa3c8..c179849ca893 100644
--- a/drivers/perf/arm_cspmu/nvidia_cspmu.c
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
@@ -1,14 +1,18 @@ 
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  *
  */
 
 /* Support for NVIDIA specific attributes. */
 
+#include <linux/module.h>
 #include <linux/topology.h>
 
-#include "nvidia_cspmu.h"
+#include "arm_cspmu.h"
+
+/* JEDEC-assigned JEP106 identification code */
+#define ARM_CSPMU_IMPL_ID_NVIDIA     0x36B
 
 #define NV_PCIE_PORT_COUNT           10ULL
 #define NV_PCIE_FILTER_ID_MASK       GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0)
@@ -351,7 +355,7 @@  static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,
 	return name;
 }
 
-int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
+static int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
 {
 	u32 prodid;
 	struct nv_cspmu_ctx *ctx;
@@ -395,6 +399,33 @@  int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(nv_cspmu_init_ops);
+
+/* Match all NVIDIA Coresight PMU devices */
+static const struct arm_cspmu_impl_param nv_cspmu_param = {
+	.module		= THIS_MODULE,
+	.impl_id	= ARM_CSPMU_IMPL_ID_NVIDIA,
+	.pvr		= 0,
+	.pvr_mask	= 0,
+	.impl_init_ops	= nv_cspmu_init_ops
+};
+
+static int __init nvidia_cspmu_init(void)
+{
+	int ret;
+
+	ret = arm_cspmu_impl_register(&nv_cspmu_param);
+	if (ret)
+		pr_err("nvidia_cspmu backend registration error: %d\n", ret);
+
+	return ret;
+}
+
+static void __exit nvidia_cspmu_exit(void)
+{
+	arm_cspmu_impl_unregister(&nv_cspmu_param);
+}
+
+module_init(nvidia_cspmu_init);
+module_exit(nvidia_cspmu_exit);
 
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.h b/drivers/perf/arm_cspmu/nvidia_cspmu.h
deleted file mode 100644
index 71e18f0dc50b..000000000000
--- a/drivers/perf/arm_cspmu/nvidia_cspmu.h
+++ /dev/null
@@ -1,17 +0,0 @@ 
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- *
- */
-
-/* Support for NVIDIA specific attributes. */
-
-#ifndef __NVIDIA_CSPMU_H__
-#define __NVIDIA_CSPMU_H__
-
-#include "arm_cspmu.h"
-
-/* Allocate NVIDIA descriptor. */
-int nv_cspmu_init_ops(struct arm_cspmu *cspmu);
-
-#endif /* __NVIDIA_CSPMU_H__ */