[v4,3/4] drivers/perf: add DesignWare PCIe PMU driver

Message ID 20230516130110.59632-4-xueshuai@linux.alibaba.com
State New
Headers
Series None |

Commit Message

Shuai Xue May 16, 2023, 1:01 p.m. UTC
  This commit adds the PCIe Performance Monitoring Unit (PMU) driver support
for T-Head Yitian SoC chip. Yitian is based on the Synopsys PCI Express
Core controller IP which provides statistics feature. The PMU is not a PCIe
Root Complex integrated End Point(RCiEP) device but only register counters
provided by each PCIe Root Port.

To facilitate collection of statistics the controller provides the
following two features for each Root Port:

- Time Based Analysis (RX/TX data throughput and time spent in each
  low-power LTSSM state)
- Event counters (Error and Non-Error for lanes)

Note, only one counter for each type and does not overflow interrupt.

This driver adds PMU devices for each PCIe Root Port. And the PMU device is
named based the BDF of Root Port. For example,

    30:03.0 PCI bridge: Device 1ded:8000 (rev 01)

the PMU device name for this Root Port is dwc_rootport_3018.

Example usage of counting PCIe RX TLP data payload (Units of 16 bytes)::

    $# perf stat -a -e dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/

average RX bandwidth can be calculated like this:

    PCIe TX Bandwidth = PCIE_TX_DATA * 16B / Measure_Time_Window

Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com>
---
 drivers/perf/Kconfig        |   7 +
 drivers/perf/Makefile       |   1 +
 drivers/perf/dwc_pcie_pmu.c | 700 ++++++++++++++++++++++++++++++++++++
 3 files changed, 708 insertions(+)
 create mode 100644 drivers/perf/dwc_pcie_pmu.c
  

Comments

Bjorn Helgaas May 16, 2023, 7:19 p.m. UTC | #1
On Tue, May 16, 2023 at 09:01:09PM +0800, Shuai Xue wrote:
> ...

> +#include <linux/pci.h>
> +#include <linux/bitfield.h>
> +#include <linux/bitops.h>
> +#include <linux/cpuhotplug.h>
> +#include <linux/cpumask.h>
> +#include <linux/device.h>
> +#include <linux/errno.h>
> +#include <linux/kernel.h>
> +#include <linux/list.h>
> +#include <linux/perf_event.h>
> +#include <linux/platform_device.h>
> +#include <linux/smp.h>
> +#include <linux/sysfs.h>
> +#include <linux/types.h>

Typically in alpha order.

> +#define DWC_PCIE_VSEC_RAS_DES_ID		0x02
> +
> +#define DWC_PCIE_EVENT_CNT_CTL			0x8

Add a blank line here.

> +/*
> + * Event Counter Data Select includes two parts:

> +#define DWC_PCIE_EVENT_CNT_DATA			0xC
> +#define DWC_PCIE_DURATION_4US			0xff
...
Pick upper-case hex or lower-case hex and use consistently.

> +#define DWC_PCIE_LANE_EVENT_MAX_PERIOD		(GENMASK_ULL(31, 0))
> +#define DWC_PCIE_TIME_BASED_EVENT_MAX_PERIOD	(GENMASK_ULL(63, 0))

Unnecessary outer "()".

> +struct dwc_pcie_pmu {
> +	struct pci_dev		*pdev;		/* Root Port device */
> +	u32			ras_des;	/* RAS DES capability offset */

u16 is enough to address all of config space.
  
kernel test robot May 16, 2023, 11:21 p.m. UTC | #2
Hi Shuai,

kernel test robot noticed the following build errors:

[auto build test ERROR on pci/next]
[also build test ERROR on pci/for-linus soc/for-next linus/master v6.4-rc2 next-20230516]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Shuai-Xue/PCI-move-Alibaba-Vendor-ID-linux-pci_ids-h/20230517-013326
base:   https://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git next
patch link:    https://lore.kernel.org/r/20230516130110.59632-4-xueshuai%40linux.alibaba.com
patch subject: [PATCH v4 3/4] drivers/perf: add DesignWare PCIe PMU driver
config: sh-allmodconfig
compiler: sh4-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/f576345a26fff4584ed49f0f42e03c65d8a7f2bf
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Shuai-Xue/PCI-move-Alibaba-Vendor-ID-linux-pci_ids-h/20230517-013326
        git checkout f576345a26fff4584ed49f0f42e03c65d8a7f2bf
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=sh olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=sh SHELL=/bin/bash drivers/perf/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202305170639.XU3djFZX-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/perf/dwc_pcie_pmu.c: In function '__dwc_pcie_pmu_probe':
>> drivers/perf/dwc_pcie_pmu.c:507:24: error: implicit declaration of function 'pci_find_vsec_capability'; did you mean 'pci_find_ext_capability'? [-Werror=implicit-function-declaration]
     507 |                 vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_ALIBABA,
         |                        ^~~~~~~~~~~~~~~~~~~~~~~~
         |                        pci_find_ext_capability
   cc1: some warnings being treated as errors


vim +507 drivers/perf/dwc_pcie_pmu.c

   487	
   488	static int __dwc_pcie_pmu_probe(struct dwc_pcie_pmu_priv *priv)
   489	{
   490		struct pci_dev *pdev = NULL;
   491		struct dwc_pcie_pmu *pcie_pmu;
   492		char *name;
   493		u32 bdf;
   494		int ret;
   495	
   496		INIT_LIST_HEAD(&priv->pmu_nodes);
   497	
   498		/* Match the rootport with VSEC_RAS_DES_ID, and register a PMU for it */
   499		for_each_pci_dev(pdev) {
   500			u16 vsec;
   501			u32 val;
   502	
   503			if (!(pci_is_pcie(pdev) &&
   504			      pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT))
   505				continue;
   506	
 > 507			vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_ALIBABA,
   508							DWC_PCIE_VSEC_RAS_DES_ID);
   509			if (!vsec)
   510				continue;
   511	
   512			pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val);
   513			if (PCI_VNDR_HEADER_REV(val) != 0x04 ||
   514			    PCI_VNDR_HEADER_LEN(val) != 0x100)
   515				continue;
   516			pci_dbg(pdev,
   517				"Detected PCIe Vendor-Specific Extended Capability RAS DES\n");
   518	
   519			bdf = PCI_DEVID(pdev->bus->number, pdev->devfn);
   520			name = devm_kasprintf(priv->dev, GFP_KERNEL, "dwc_rootport_%x",
   521					      bdf);
   522			if (!name)
   523				return -ENOMEM;
   524	
   525			/* All checks passed, go go go */
   526			pcie_pmu = devm_kzalloc(&pdev->dev, sizeof(*pcie_pmu), GFP_KERNEL);
   527			if (!pcie_pmu) {
   528				pci_dev_put(pdev);
   529				return -ENOMEM;
   530			}
   531	
   532			pcie_pmu->pdev = pdev;
   533			pcie_pmu->ras_des = vsec;
   534			pcie_pmu->nr_lanes = pcie_get_width_cap(pdev);
   535			pcie_pmu->pmu = (struct pmu){
   536				.module		= THIS_MODULE,
   537				.attr_groups	= dwc_pcie_attr_groups,
   538				.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
   539				.task_ctx_nr	= perf_invalid_context,
   540				.event_init	= dwc_pcie_pmu_event_init,
   541				.add		= dwc_pcie_pmu_event_add,
   542				.del		= dwc_pcie_pmu_event_del,
   543				.start		= dwc_pcie_pmu_event_start,
   544				.stop		= dwc_pcie_pmu_event_stop,
   545				.read		= dwc_pcie_pmu_event_update,
   546			};
   547	
   548			/* Add this instance to the list used by the offline callback */
   549			ret = cpuhp_state_add_instance(dwc_pcie_pmu_hp_state,
   550						       &pcie_pmu->cpuhp_node);
   551			if (ret) {
   552				pci_err(pcie_pmu->pdev,
   553					"Error %d registering hotplug @%x\n", ret, bdf);
   554				return ret;
   555			}
   556			ret = perf_pmu_register(&pcie_pmu->pmu, name, -1);
   557			if (ret) {
   558				pci_err(pcie_pmu->pdev,
   559					"Error %d registering PMU @%x\n", ret, bdf);
   560				cpuhp_state_remove_instance_nocalls(
   561					dwc_pcie_pmu_hp_state, &pcie_pmu->cpuhp_node);
   562				return ret;
   563			}
   564	
   565			/* Add registered PMUs and unregister them when this driver remove */
   566			list_add(&pcie_pmu->pmu_node, &priv->pmu_nodes);
   567		}
   568	
   569		return 0;
   570	}
   571
  
Shuai Xue May 17, 2023, 2:35 a.m. UTC | #3
On 2023/5/17 03:19, Bjorn Helgaas wrote:
> On Tue, May 16, 2023 at 09:01:09PM +0800, Shuai Xue wrote:
>> ...
> 
>> +#include <linux/pci.h>
>> +#include <linux/bitfield.h>
>> +#include <linux/bitops.h>
>> +#include <linux/cpuhotplug.h>
>> +#include <linux/cpumask.h>
>> +#include <linux/device.h>
>> +#include <linux/errno.h>
>> +#include <linux/kernel.h>
>> +#include <linux/list.h>
>> +#include <linux/perf_event.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/smp.h>
>> +#include <linux/sysfs.h>
>> +#include <linux/types.h>
> 
> Typically in alpha order.

Got it, I will reorder them.

> 
>> +#define DWC_PCIE_VSEC_RAS_DES_ID		0x02
>> +
>> +#define DWC_PCIE_EVENT_CNT_CTL			0x8
> 
> Add a blank line here.

Sure, will add it.

> 
>> +/*
>> + * Event Counter Data Select includes two parts:
> 
>> +#define DWC_PCIE_EVENT_CNT_DATA			0xC
>> +#define DWC_PCIE_DURATION_4US			0xff
> ...
> Pick upper-case hex or lower-case hex and use consistently.

Will pick upper-case hex for all macros.

> 
>> +#define DWC_PCIE_LANE_EVENT_MAX_PERIOD		(GENMASK_ULL(31, 0))
>> +#define DWC_PCIE_TIME_BASED_EVENT_MAX_PERIOD	(GENMASK_ULL(63, 0))
> 
> Unnecessary outer "()".

Ok, will remove it.

> 
>> +struct dwc_pcie_pmu {
>> +	struct pci_dev		*pdev;		/* Root Port device */
>> +	u32			ras_des;	/* RAS DES capability offset */
> 
> u16 is enough to address all of config space.

Go it. will fix in next version.

Thank you :)

Best Regards,
Shuai
  
Shuai Xue May 17, 2023, 3:37 a.m. UTC | #4
On 2023/5/17 07:21, kernel test robot wrote:
> Hi Shuai,
> 
> kernel test robot noticed the following build errors:
> 
> [auto build test ERROR on pci/next]
> [also build test ERROR on pci/for-linus soc/for-next linus/master v6.4-rc2 next-20230516]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
> 
> url:    https://github.com/intel-lab-lkp/linux/commits/Shuai-Xue/PCI-move-Alibaba-Vendor-ID-linux-pci_ids-h/20230517-013326
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git next
> patch link:    https://lore.kernel.org/r/20230516130110.59632-4-xueshuai%40linux.alibaba.com
> patch subject: [PATCH v4 3/4] drivers/perf: add DesignWare PCIe PMU driver
> config: sh-allmodconfig
> compiler: sh4-linux-gcc (GCC) 12.1.0
> reproduce (this is a W=1 build):
>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # https://github.com/intel-lab-lkp/linux/commit/f576345a26fff4584ed49f0f42e03c65d8a7f2bf
>         git remote add linux-review https://github.com/intel-lab-lkp/linux
>         git fetch --no-tags linux-review Shuai-Xue/PCI-move-Alibaba-Vendor-ID-linux-pci_ids-h/20230517-013326
>         git checkout f576345a26fff4584ed49f0f42e03c65d8a7f2bf
>         # save the config file
>         mkdir build_dir && cp config build_dir/.config
>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=sh olddefconfig
>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=sh SHELL=/bin/bash drivers/perf/
> 
> If you fix the issue, kindly add following tag where applicable
> | Reported-by: kernel test robot <lkp@intel.com>
> | Link: https://lore.kernel.org/oe-kbuild-all/202305170639.XU3djFZX-lkp@intel.com/
> 
> All errors (new ones prefixed by >>):
> 
>    drivers/perf/dwc_pcie_pmu.c: In function '__dwc_pcie_pmu_probe':
>>> drivers/perf/dwc_pcie_pmu.c:507:24: error: implicit declaration of function 'pci_find_vsec_capability'; did you mean 'pci_find_ext_capability'? [-Werror=implicit-function-declaration]
>      507 |                 vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_ALIBABA,
>          |                        ^~~~~~~~~~~~~~~~~~~~~~~~
>          |                        pci_find_ext_capability
>    cc1: some warnings being treated as errors

This will be fixed by:

- remove COMPILE_TEST and add depend on PCI in kconfig

Thank you.
Shuai

> 
> vim +507 drivers/perf/dwc_pcie_pmu.c
> 
>    487	
>    488	static int __dwc_pcie_pmu_probe(struct dwc_pcie_pmu_priv *priv)
>    489	{
>    490		struct pci_dev *pdev = NULL;
>    491		struct dwc_pcie_pmu *pcie_pmu;
>    492		char *name;
>    493		u32 bdf;
>    494		int ret;
>    495	
>    496		INIT_LIST_HEAD(&priv->pmu_nodes);
>    497	
>    498		/* Match the rootport with VSEC_RAS_DES_ID, and register a PMU for it */
>    499		for_each_pci_dev(pdev) {
>    500			u16 vsec;
>    501			u32 val;
>    502	
>    503			if (!(pci_is_pcie(pdev) &&
>    504			      pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT))
>    505				continue;
>    506	
>  > 507			vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_ALIBABA,
>    508							DWC_PCIE_VSEC_RAS_DES_ID);
>    509			if (!vsec)
>    510				continue;
>    511	
>    512			pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val);
>    513			if (PCI_VNDR_HEADER_REV(val) != 0x04 ||
>    514			    PCI_VNDR_HEADER_LEN(val) != 0x100)
>    515				continue;
>    516			pci_dbg(pdev,
>    517				"Detected PCIe Vendor-Specific Extended Capability RAS DES\n");
>    518	
>    519			bdf = PCI_DEVID(pdev->bus->number, pdev->devfn);
>    520			name = devm_kasprintf(priv->dev, GFP_KERNEL, "dwc_rootport_%x",
>    521					      bdf);
>    522			if (!name)
>    523				return -ENOMEM;
>    524	
>    525			/* All checks passed, go go go */
>    526			pcie_pmu = devm_kzalloc(&pdev->dev, sizeof(*pcie_pmu), GFP_KERNEL);
>    527			if (!pcie_pmu) {
>    528				pci_dev_put(pdev);
>    529				return -ENOMEM;
>    530			}
>    531	
>    532			pcie_pmu->pdev = pdev;
>    533			pcie_pmu->ras_des = vsec;
>    534			pcie_pmu->nr_lanes = pcie_get_width_cap(pdev);
>    535			pcie_pmu->pmu = (struct pmu){
>    536				.module		= THIS_MODULE,
>    537				.attr_groups	= dwc_pcie_attr_groups,
>    538				.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
>    539				.task_ctx_nr	= perf_invalid_context,
>    540				.event_init	= dwc_pcie_pmu_event_init,
>    541				.add		= dwc_pcie_pmu_event_add,
>    542				.del		= dwc_pcie_pmu_event_del,
>    543				.start		= dwc_pcie_pmu_event_start,
>    544				.stop		= dwc_pcie_pmu_event_stop,
>    545				.read		= dwc_pcie_pmu_event_update,
>    546			};
>    547	
>    548			/* Add this instance to the list used by the offline callback */
>    549			ret = cpuhp_state_add_instance(dwc_pcie_pmu_hp_state,
>    550						       &pcie_pmu->cpuhp_node);
>    551			if (ret) {
>    552				pci_err(pcie_pmu->pdev,
>    553					"Error %d registering hotplug @%x\n", ret, bdf);
>    554				return ret;
>    555			}
>    556			ret = perf_pmu_register(&pcie_pmu->pmu, name, -1);
>    557			if (ret) {
>    558				pci_err(pcie_pmu->pdev,
>    559					"Error %d registering PMU @%x\n", ret, bdf);
>    560				cpuhp_state_remove_instance_nocalls(
>    561					dwc_pcie_pmu_hp_state, &pcie_pmu->cpuhp_node);
>    562				return ret;
>    563			}
>    564	
>    565			/* Add registered PMUs and unregister them when this driver remove */
>    566			list_add(&pcie_pmu->pmu_node, &priv->pmu_nodes);
>    567		}
>    568	
>    569		return 0;
>    570	}
>    571	
>
  

Patch

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 711f82400086..d5750cbc67c4 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -209,6 +209,13 @@  config MARVELL_CN10K_DDR_PMU
 	  Enable perf support for Marvell DDR Performance monitoring
 	  event on CN10K platform.
 
+config DWC_PCIE_PMU
+	tristate "Enable Synopsys DesignWare PCIe PMU Support"
+	depends on ARM64 || COMPILE_TEST
+	help
+	  Enable perf support for Synopsys DesignWare PCIe PMU Performance
+	  monitoring event on Yitian 710 platform.
+
 source "drivers/perf/arm_cspmu/Kconfig"
 
 source "drivers/perf/amlogic/Kconfig"
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index dabc859540ce..13a6d1b286da 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -22,5 +22,6 @@  obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
 obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
 obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
 obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
+obj-$(CONFIG_DWC_PCIE_PMU) += dwc_pcie_pmu.o
 obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/
 obj-$(CONFIG_MESON_DDR_PMU) += amlogic/
diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c
new file mode 100644
index 000000000000..1ecb06579137
--- /dev/null
+++ b/drivers/perf/dwc_pcie_pmu.c
@@ -0,0 +1,700 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Synopsys DesignWare PCIe PMU driver
+ *
+ * Copyright (C) 2021-2023 Alibaba Inc.
+ */
+
+#include <linux/pci.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#define DWC_PCIE_VSEC_RAS_DES_ID		0x02
+
+#define DWC_PCIE_EVENT_CNT_CTL			0x8
+/*
+ * Event Counter Data Select includes two parts:
+ * - 27-24: Group number(4-bit: 0..0x7)
+ * - 23-16: Event number(8-bit: 0..0x13) within the Group
+ *
+ * Put them togother as TRM used.
+ */
+#define DWC_PCIE_CNT_EVENT_SEL			GENMASK(27, 16)
+#define DWC_PCIE_CNT_LANE_SEL			GENMASK(11, 8)
+#define DWC_PCIE_CNT_STATUS			BIT(7)
+#define DWC_PCIE_CNT_ENABLE			GENMASK(4, 2)
+#define DWC_PCIE_PER_EVENT_OFF			0x1
+#define DWC_PCIE_PER_EVENT_ON			0x3
+#define DWC_PCIE_EVENT_CLEAR			GENMASK(1, 0)
+#define DWC_PCIE_EVENT_PER_CLEAR		0x1
+
+#define DWC_PCIE_EVENT_CNT_DATA			0xC
+
+#define DWC_PCIE_TIME_BASED_ANAL_CTL		0x10
+#define DWC_PCIE_TIME_BASED_REPORT_SEL		GENMASK(31, 24)
+#define DWC_PCIE_TIME_BASED_DURATION_SEL	GENMASK(15, 8)
+#define DWC_PCIE_DURATION_MANUAL_CTL		0x0
+#define DWC_PCIE_DURATION_1MS			0x1
+#define DWC_PCIE_DURATION_10MS			0x2
+#define DWC_PCIE_DURATION_100MS			0x3
+#define DWC_PCIE_DURATION_1S			0x4
+#define DWC_PCIE_DURATION_2S			0x5
+#define DWC_PCIE_DURATION_4S			0x6
+#define DWC_PCIE_DURATION_4US			0xff
+#define DWC_PCIE_TIME_BASED_TIMER_START		BIT(0)
+#define DWC_PCIE_TIME_BASED_CNT_ENABLE		0x1
+
+#define DWC_PCIE_TIME_BASED_ANAL_DATA_REG_LOW	0x14
+#define DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH	0x18
+
+/* Event attributes */
+#define DWC_PCIE_CONFIG_EVENTID			GENMASK(15, 0)
+#define DWC_PCIE_CONFIG_TYPE			GENMASK(19, 16)
+#define DWC_PCIE_CONFIG_LANE			GENMASK(27, 20)
+
+#define DWC_PCIE_EVENT_ID(event)	FIELD_GET(DWC_PCIE_CONFIG_EVENTID, (event)->attr.config)
+#define DWC_PCIE_EVENT_TYPE(event)	FIELD_GET(DWC_PCIE_CONFIG_TYPE, (event)->attr.config)
+#define DWC_PCIE_EVENT_LANE(event)	FIELD_GET(DWC_PCIE_CONFIG_LANE, (event)->attr.config)
+
+enum dwc_pcie_event_type {
+	DWC_PCIE_TYPE_INVALID,
+	DWC_PCIE_TIME_BASE_EVENT,
+	DWC_PCIE_LANE_EVENT,
+};
+
+#define DWC_PCIE_LANE_EVENT_MAX_PERIOD		(GENMASK_ULL(31, 0))
+#define DWC_PCIE_TIME_BASED_EVENT_MAX_PERIOD	(GENMASK_ULL(63, 0))
+
+
+struct dwc_pcie_pmu {
+	struct pci_dev		*pdev;		/* Root Port device */
+	u32			ras_des;	/* RAS DES capability offset */
+	u32			nr_lanes;
+
+	struct list_head	pmu_node;
+	struct hlist_node	cpuhp_node;
+	struct pmu		pmu;
+	struct perf_event	*event;
+	int			oncpu;
+};
+
+struct dwc_pcie_pmu_priv {
+	struct device *dev;
+	struct list_head pmu_nodes;
+};
+
+#define to_dwc_pcie_pmu(p) (container_of(p, struct dwc_pcie_pmu, pmu))
+
+static struct platform_device *dwc_pcie_pmu_dev;
+static int dwc_pcie_pmu_hp_state;
+
+static ssize_t cpumask_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(pcie_pmu->oncpu));
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *dwc_pcie_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL
+};
+
+static struct attribute_group dwc_pcie_cpumask_attr_group = {
+	.attrs = dwc_pcie_pmu_cpumask_attrs,
+};
+
+struct dwc_pcie_format_attr {
+	struct device_attribute attr;
+	u64 field;
+	int config;
+};
+
+static ssize_t dwc_pcie_pmu_format_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct dwc_pcie_format_attr *fmt = container_of(attr, typeof(*fmt), attr);
+	int lo = __ffs(fmt->field), hi = __fls(fmt->field);
+
+	return sysfs_emit(buf, "config:%d-%d\n", lo, hi);
+}
+
+#define _dwc_pcie_format_attr(_name, _cfg, _fld)				\
+	(&((struct dwc_pcie_format_attr[]) {{					\
+		.attr = __ATTR(_name, 0444, dwc_pcie_pmu_format_show, NULL),	\
+		.config = _cfg,							\
+		.field = _fld,							\
+	}})[0].attr.attr)
+
+#define dwc_pcie_format_attr(_name, _fld)	_dwc_pcie_format_attr(_name, 0, _fld)
+
+static struct attribute *dwc_pcie_format_attrs[] = {
+	dwc_pcie_format_attr(type, DWC_PCIE_CONFIG_TYPE),
+	dwc_pcie_format_attr(eventid, DWC_PCIE_CONFIG_EVENTID),
+	dwc_pcie_format_attr(lane, DWC_PCIE_CONFIG_LANE),
+	NULL,
+};
+
+static struct attribute_group dwc_pcie_format_attrs_group = {
+	.name = "format",
+	.attrs = dwc_pcie_format_attrs,
+};
+
+struct dwc_pcie_event_attr {
+	struct device_attribute attr;
+	enum dwc_pcie_event_type type;
+	u16 eventid;
+	u8 lane;
+};
+
+static ssize_t dwc_pcie_event_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct dwc_pcie_event_attr *eattr;
+
+	eattr = container_of(attr, typeof(*eattr), attr);
+
+	if (eattr->type == DWC_PCIE_LANE_EVENT)
+		return sysfs_emit(buf, "eventid=0x%x,type=0x%x,lane=?\n",
+				  eattr->eventid, eattr->type);
+
+	return sysfs_emit(buf, "eventid=0x%x,type=0x%x\n", eattr->eventid,
+		       eattr->type);
+}
+
+#define DWC_PCIE_EVENT_ATTR(_name, _type, _eventid, _lane)		\
+	(&((struct dwc_pcie_event_attr[]) {{				\
+		.attr = __ATTR(_name, 0444, dwc_pcie_event_show, NULL),	\
+		.type = _type,						\
+		.eventid = _eventid,					\
+		.lane = _lane,						\
+	}})[0].attr.attr)
+
+#define DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(_name, _eventid)		\
+	DWC_PCIE_EVENT_ATTR(_name, DWC_PCIE_TIME_BASE_EVENT, _eventid, 0)
+#define DWC_PCIE_PMU_LANE_EVENT_ATTR(_name, _eventid)			\
+	DWC_PCIE_EVENT_ATTR(_name, DWC_PCIE_LANE_EVENT, _eventid, 0)
+
+static struct attribute *dwc_pcie_pmu_time_event_attrs[] = {
+	/* Group #0 */
+	DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(one_cycle, 0x00),
+	DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_L0S, 0x01),
+	DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(RX_L0S, 0x02),
+	DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L0, 0x03),
+	DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1, 0x04),
+	DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_1, 0x05),
+	DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_2, 0x06),
+	DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(CFG_RCVRY, 0x07),
+	DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_RX_L0S, 0x08),
+	DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_AUX, 0x09),
+
+	/* Group #1 */
+	DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Tx_PCIe_TLP_Data_Payload, 0x20),
+	DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Rx_PCIe_TLP_Data_Payload, 0x21),
+	DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Tx_CCIX_TLP_Data_Payload, 0x22),
+	DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Rx_CCIX_TLP_Data_Payload, 0x23),
+
+	/*
+	 * Leave it to the user to specify the lane ID to avoid generating
+	 * a list of hundreds of events.
+	 */
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_ack_dllp, 0x600),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_update_fc_dllp, 0x601),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_ack_dllp, 0x602),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_update_fc_dllp, 0x603),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_nulified_tlp, 0x604),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_nulified_tlp, 0x605),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_duplicate_tl, 0x606),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_write, 0x700),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_read, 0x701),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_configuration_write, 0x702),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_configuration_read, 0x703),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_io_write, 0x704),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_io_read, 0x705),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_completion_without_data, 0x706),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_completion_with_data, 0x707),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_message_tlp, 0x708),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_atomic, 0x709),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_tlp_with_prefix, 0x70A),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_memory_write, 0x70B),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_memory_read, 0x70C),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_io_write, 0x70F),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_io_read, 0x710),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_completion_without_data, 0x711),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_completion_with_data, 0x712),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_message_tlp, 0x713),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_atomic, 0x714),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_tlp_with_prefix, 0x715),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_ccix_tlp, 0x716),
+	DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_ccix_tlp, 0x717),
+
+	NULL
+};
+
+static const struct attribute_group dwc_pcie_event_attrs_group = {
+	.name = "events",
+	.attrs = dwc_pcie_pmu_time_event_attrs,
+};
+
+static const struct attribute_group *dwc_pcie_attr_groups[] = {
+	&dwc_pcie_event_attrs_group,
+	&dwc_pcie_format_attrs_group,
+	&dwc_pcie_cpumask_attr_group,
+	NULL
+};
+
+static void dwc_pcie_pmu_lane_event_enable(struct dwc_pcie_pmu *pcie_pmu,
+					   bool enable)
+{
+	struct pci_dev *pdev = pcie_pmu->pdev;
+	u16 ras_des = pcie_pmu->ras_des;
+	u32 val;
+
+	pci_read_config_dword(pdev, ras_des + DWC_PCIE_EVENT_CNT_CTL, &val);
+
+	/* Clear DWC_PCIE_CNT_ENABLE field first */
+	val &= ~DWC_PCIE_CNT_ENABLE;
+	if (enable)
+		val |= FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON);
+	else
+		val |= FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF);
+
+	pci_write_config_dword(pdev, ras_des + DWC_PCIE_EVENT_CNT_CTL, val);
+}
+
+static void dwc_pcie_pmu_time_based_event_enable(struct dwc_pcie_pmu *pcie_pmu,
+					  bool enable)
+{
+	struct pci_dev *pdev = pcie_pmu->pdev;
+	u16 ras_des = pcie_pmu->ras_des;
+	u32 val;
+
+	pci_read_config_dword(pdev, ras_des + DWC_PCIE_TIME_BASED_ANAL_CTL,
+			      &val);
+
+	if (enable)
+		val |= DWC_PCIE_TIME_BASED_CNT_ENABLE;
+	else
+		val &= ~DWC_PCIE_TIME_BASED_CNT_ENABLE;
+
+	pci_write_config_dword(pdev, ras_des + DWC_PCIE_TIME_BASED_ANAL_CTL,
+			       val);
+}
+
+static u64 dwc_pcie_pmu_read_lane_event_counter(struct dwc_pcie_pmu *pcie_pmu)
+{
+	struct pci_dev *pdev = pcie_pmu->pdev;
+	u16 ras_des = pcie_pmu->ras_des;
+	u32 val;
+
+	pci_read_config_dword(pdev, ras_des + DWC_PCIE_EVENT_CNT_DATA, &val);
+
+	return val;
+}
+
+static u64 dwc_pcie_pmu_read_time_based_counter(struct dwc_pcie_pmu *pcie_pmu)
+{
+	struct pci_dev *pdev = pcie_pmu->pdev;
+	u16 ras_des = pcie_pmu->ras_des;
+	u64 count;
+	u32 val;
+
+	pci_read_config_dword(
+		pdev, ras_des + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH, &val);
+	count = val;
+	count <<= 32;
+
+	pci_read_config_dword(
+		pdev, ras_des + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_LOW, &val);
+
+	count += val;
+
+	return count;
+}
+
+static void dwc_pcie_pmu_event_update(struct perf_event *event)
+{
+	struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+	u64 delta, prev, now;
+
+	do {
+		prev = local64_read(&hwc->prev_count);
+
+		if (type == DWC_PCIE_LANE_EVENT)
+			now = dwc_pcie_pmu_read_lane_event_counter(pcie_pmu);
+		else if (type == DWC_PCIE_TIME_BASE_EVENT)
+			now = dwc_pcie_pmu_read_time_based_counter(pcie_pmu);
+
+	} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
+
+	if (type == DWC_PCIE_LANE_EVENT)
+		delta = (now - prev) & DWC_PCIE_LANE_EVENT_MAX_PERIOD;
+	else if (type == DWC_PCIE_TIME_BASE_EVENT)
+		delta = (now - prev) & DWC_PCIE_TIME_BASED_EVENT_MAX_PERIOD;
+
+	local64_add(delta, &event->count);
+}
+
+static int dwc_pcie_pmu_event_init(struct perf_event *event)
+{
+	struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+	enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+	struct perf_event *sibling;
+	u32 lane;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* We don't support sampling */
+	if (is_sampling_event(event))
+		return -EINVAL;
+
+	/* We cannot support task bound events */
+	if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK)
+		return -EINVAL;
+
+	if (event->group_leader != event &&
+	    !is_software_event(event->group_leader))
+		return -EINVAL;
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (sibling->pmu != event->pmu && !is_software_event(sibling))
+			return -EINVAL;
+	}
+
+	if (type == DWC_PCIE_LANE_EVENT) {
+		lane = DWC_PCIE_EVENT_LANE(event);
+		if (lane < 0 || lane >= pcie_pmu->nr_lanes)
+			return -EINVAL;
+	}
+
+	event->cpu = pcie_pmu->oncpu;
+
+	return 0;
+}
+
+static void dwc_pcie_pmu_set_period(struct hw_perf_event *hwc)
+{
+	local64_set(&hwc->prev_count, 0);
+}
+
+static void dwc_pcie_pmu_event_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+	enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+
+	hwc->state = 0;
+	dwc_pcie_pmu_set_period(hwc);
+
+	if (type == DWC_PCIE_LANE_EVENT)
+		dwc_pcie_pmu_lane_event_enable(pcie_pmu, true);
+	else if (type == DWC_PCIE_TIME_BASE_EVENT)
+		dwc_pcie_pmu_time_based_event_enable(pcie_pmu, true);
+}
+
+static void dwc_pcie_pmu_event_stop(struct perf_event *event, int flags)
+{
+	struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+	enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	if (type == DWC_PCIE_LANE_EVENT)
+		dwc_pcie_pmu_lane_event_enable(pcie_pmu, false);
+	else if (type == DWC_PCIE_TIME_BASE_EVENT)
+		dwc_pcie_pmu_time_based_event_enable(pcie_pmu, false);
+
+	dwc_pcie_pmu_event_update(event);
+	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int dwc_pcie_pmu_event_add(struct perf_event *event, int flags)
+{
+	struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+	struct pci_dev *pdev = pcie_pmu->pdev;
+	struct hw_perf_event *hwc = &event->hw;
+	enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+	int event_id = DWC_PCIE_EVENT_ID(event);
+	int lane = DWC_PCIE_EVENT_LANE(event);
+	u16 ras_des = pcie_pmu->ras_des;
+	u32 ctrl;
+
+	/* Only one counter and it is in use */
+	if (pcie_pmu->event)
+		return -ENOSPC;
+
+	pcie_pmu->event = event;
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	if (type == DWC_PCIE_LANE_EVENT) {
+		/* EVENT_COUNTER_DATA_REG needs clear manually */
+		ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) |
+			FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) |
+			FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF) |
+			FIELD_PREP(DWC_PCIE_EVENT_CLEAR, DWC_PCIE_EVENT_PER_CLEAR);
+		pci_write_config_dword(pdev, ras_des + DWC_PCIE_EVENT_CNT_CTL,
+				       ctrl);
+	} else if (type == DWC_PCIE_TIME_BASE_EVENT) {
+		/*
+		 * TIME_BASED_ANAL_DATA_REG is a 64 bit register, we can safely
+		 * use it with any manually controlled duration. And it is
+		 * cleared when next measurement starts.
+		 */
+		ctrl = FIELD_PREP(DWC_PCIE_TIME_BASED_REPORT_SEL, event_id) |
+			FIELD_PREP(DWC_PCIE_TIME_BASED_DURATION_SEL,
+				   DWC_PCIE_DURATION_MANUAL_CTL) |
+			DWC_PCIE_TIME_BASED_CNT_ENABLE;
+		pci_write_config_dword(
+			pdev, ras_des + DWC_PCIE_TIME_BASED_ANAL_CTL, ctrl);
+	}
+
+	if (flags & PERF_EF_START)
+		dwc_pcie_pmu_event_start(event, PERF_EF_RELOAD);
+
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void dwc_pcie_pmu_event_del(struct perf_event *event, int flags)
+{
+	struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+
+	dwc_pcie_pmu_event_stop(event, flags | PERF_EF_UPDATE);
+	perf_event_update_userpage(event);
+	pcie_pmu->event = NULL;
+}
+
+static int __dwc_pcie_pmu_probe(struct dwc_pcie_pmu_priv *priv)
+{
+	struct pci_dev *pdev = NULL;
+	struct dwc_pcie_pmu *pcie_pmu;
+	char *name;
+	u32 bdf;
+	int ret;
+
+	INIT_LIST_HEAD(&priv->pmu_nodes);
+
+	/* Match the rootport with VSEC_RAS_DES_ID, and register a PMU for it */
+	for_each_pci_dev(pdev) {
+		u16 vsec;
+		u32 val;
+
+		if (!(pci_is_pcie(pdev) &&
+		      pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT))
+			continue;
+
+		vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_ALIBABA,
+						DWC_PCIE_VSEC_RAS_DES_ID);
+		if (!vsec)
+			continue;
+
+		pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val);
+		if (PCI_VNDR_HEADER_REV(val) != 0x04 ||
+		    PCI_VNDR_HEADER_LEN(val) != 0x100)
+			continue;
+		pci_dbg(pdev,
+			"Detected PCIe Vendor-Specific Extended Capability RAS DES\n");
+
+		bdf = PCI_DEVID(pdev->bus->number, pdev->devfn);
+		name = devm_kasprintf(priv->dev, GFP_KERNEL, "dwc_rootport_%x",
+				      bdf);
+		if (!name)
+			return -ENOMEM;
+
+		/* All checks passed, go go go */
+		pcie_pmu = devm_kzalloc(&pdev->dev, sizeof(*pcie_pmu), GFP_KERNEL);
+		if (!pcie_pmu) {
+			pci_dev_put(pdev);
+			return -ENOMEM;
+		}
+
+		pcie_pmu->pdev = pdev;
+		pcie_pmu->ras_des = vsec;
+		pcie_pmu->nr_lanes = pcie_get_width_cap(pdev);
+		pcie_pmu->pmu = (struct pmu){
+			.module		= THIS_MODULE,
+			.attr_groups	= dwc_pcie_attr_groups,
+			.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+			.task_ctx_nr	= perf_invalid_context,
+			.event_init	= dwc_pcie_pmu_event_init,
+			.add		= dwc_pcie_pmu_event_add,
+			.del		= dwc_pcie_pmu_event_del,
+			.start		= dwc_pcie_pmu_event_start,
+			.stop		= dwc_pcie_pmu_event_stop,
+			.read		= dwc_pcie_pmu_event_update,
+		};
+
+		/* Add this instance to the list used by the offline callback */
+		ret = cpuhp_state_add_instance(dwc_pcie_pmu_hp_state,
+					       &pcie_pmu->cpuhp_node);
+		if (ret) {
+			pci_err(pcie_pmu->pdev,
+				"Error %d registering hotplug @%x\n", ret, bdf);
+			return ret;
+		}
+		ret = perf_pmu_register(&pcie_pmu->pmu, name, -1);
+		if (ret) {
+			pci_err(pcie_pmu->pdev,
+				"Error %d registering PMU @%x\n", ret, bdf);
+			cpuhp_state_remove_instance_nocalls(
+				dwc_pcie_pmu_hp_state, &pcie_pmu->cpuhp_node);
+			return ret;
+		}
+
+		/* Add registered PMUs and unregister them when this driver remove */
+		list_add(&pcie_pmu->pmu_node, &priv->pmu_nodes);
+	}
+
+	return 0;
+}
+
+static int dwc_pcie_pmu_remove(struct platform_device *pdev)
+{
+	struct dwc_pcie_pmu_priv *priv = platform_get_drvdata(pdev);
+	struct dwc_pcie_pmu *pcie_pmu;
+
+	list_for_each_entry(pcie_pmu, &priv->pmu_nodes, pmu_node) {
+		cpuhp_state_remove_instance(dwc_pcie_pmu_hp_state,
+					    &pcie_pmu->cpuhp_node);
+		perf_pmu_unregister(&pcie_pmu->pmu);
+	}
+
+	return 0;
+}
+
+static int dwc_pcie_pmu_probe(struct platform_device *pdev)
+{
+	struct dwc_pcie_pmu_priv *priv;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->dev = &pdev->dev;
+	platform_set_drvdata(pdev, priv);
+
+	/* If one PMU registration fails, remove all. */
+	if (__dwc_pcie_pmu_probe(priv))
+		dwc_pcie_pmu_remove(pdev);
+
+	return 0;
+}
+
+static void dwc_pcie_pmu_migrate(struct dwc_pcie_pmu *pcie_pmu, unsigned int cpu)
+{
+	/* This PMU does NOT support interrupt, just migrate context. */
+	perf_pmu_migrate_context(&pcie_pmu->pmu, pcie_pmu->oncpu, cpu);
+	pcie_pmu->oncpu = cpu;
+}
+
+static int dwc_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+	struct dwc_pcie_pmu *pcie_pmu;
+	struct pci_dev *pdev;
+	int node;
+
+	pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node);
+	pdev = pcie_pmu->pdev;
+	node = dev_to_node(&pdev->dev);
+
+	if (node != NUMA_NO_NODE && cpu_to_node(pcie_pmu->oncpu) != node &&
+	    cpu_to_node(cpu) == node)
+		dwc_pcie_pmu_migrate(pcie_pmu, cpu);
+
+	return 0;
+}
+
+static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+	struct dwc_pcie_pmu *pcie_pmu;
+	struct pci_dev *pdev;
+	int node;
+	cpumask_t mask;
+	unsigned int target;
+
+	pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node);
+	if (cpu != pcie_pmu->oncpu)
+		return 0;
+
+	pdev = pcie_pmu->pdev;
+	node = dev_to_node(&pdev->dev);
+	if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) &&
+	    cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
+		target = cpumask_any(&mask);
+	else
+		target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target < nr_cpu_ids)
+		dwc_pcie_pmu_migrate(pcie_pmu, target);
+
+	return 0;
+}
+
+static struct platform_driver dwc_pcie_pmu_driver = {
+	.probe = dwc_pcie_pmu_probe,
+	.remove = dwc_pcie_pmu_remove,
+	.driver = {.name = "dwc_pcie_pmu",},
+};
+
+static int __init dwc_pcie_pmu_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				      "perf/dwc_pcie_pmu:online",
+				      dwc_pcie_pmu_online_cpu,
+				      dwc_pcie_pmu_offline_cpu);
+	if (ret < 0)
+		return ret;
+
+	dwc_pcie_pmu_hp_state = ret;
+
+	ret = platform_driver_register(&dwc_pcie_pmu_driver);
+	if (ret) {
+		cpuhp_remove_multi_state(dwc_pcie_pmu_hp_state);
+		return ret;
+	}
+
+	dwc_pcie_pmu_dev = platform_device_register_simple(
+				"dwc_pcie_pmu", PLATFORM_DEVID_NONE, NULL, 0);
+	if (IS_ERR(dwc_pcie_pmu_dev)) {
+		platform_driver_unregister(&dwc_pcie_pmu_driver);
+		return PTR_ERR(dwc_pcie_pmu_dev);
+	}
+
+	return 0;
+}
+
+static void __exit dwc_pcie_pmu_exit(void)
+{
+	platform_device_unregister(dwc_pcie_pmu_dev);
+	platform_driver_unregister(&dwc_pcie_pmu_driver);
+}
+
+module_init(dwc_pcie_pmu_init);
+module_exit(dwc_pcie_pmu_exit);
+
+MODULE_DESCRIPTION("PMU driver for DesignWare Cores PCI Express Controller");
+MODULE_AUTHOR("Shuai xue <xueshuai@linux.alibaba.com>");
+MODULE_AUTHOR("Wen Cheng <yinxuan_cw@linux.alibaba.com>");
+MODULE_LICENSE("GPL v2");