[2/3] perf/amlogic: Fix large number of counter issue

Message ID 20230209115403.521868-2-jiucheng.xu@amlogic.com
State New
Headers
Series [1/3] perf/amlogic: Fix config1/config2 parsing issue |

Commit Message

Jiucheng Xu Feb. 9, 2023, 11:54 a.m. UTC
  When use 1ms interval, very large number of counter happens
once in a while as below:

25.968654513 281474976710655.84 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
26.118657346 281474976710655.88 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
26.180137180 281474976710655.66 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/

Root cause is the race between irq handler
and pmu.read callback. Use spin lock to protect the sw&hw
counters.

Signed-off-by: Jiucheng Xu <jiucheng.xu@amlogic.com>
---
 drivers/perf/amlogic/meson_ddr_pmu_core.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)
  

Comments

Will Deacon March 27, 2023, 2:10 p.m. UTC | #1
On Thu, Feb 09, 2023 at 07:54:02PM +0800, Jiucheng Xu wrote:
> When use 1ms interval, very large number of counter happens
> once in a while as below:
> 
> 25.968654513 281474976710655.84 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> 26.118657346 281474976710655.88 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> 26.180137180 281474976710655.66 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> 
> Root cause is the race between irq handler
> and pmu.read callback. Use spin lock to protect the sw&hw
> counters.
> 
> Signed-off-by: Jiucheng Xu <jiucheng.xu@amlogic.com>
> ---
>  drivers/perf/amlogic/meson_ddr_pmu_core.c | 10 +++++++++-
>  1 file changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
> index 0b24dee1ed3c..9b2e5d5c0626 100644
> --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
> +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
> @@ -14,6 +14,7 @@
>  #include <linux/perf_event.h>
>  #include <linux/platform_device.h>
>  #include <linux/printk.h>
> +#include <linux/spinlock.h>
>  #include <linux/sysfs.h>
>  #include <linux/types.h>
>  
> @@ -23,6 +24,7 @@ struct ddr_pmu {
>  	struct pmu pmu;
>  	struct dmc_info info;
>  	struct dmc_counter counters;	/* save counters from hw */
> +	spinlock_t lock;		/* protect hw/sw counter */
>  	bool pmu_enabled;
>  	struct device *dev;
>  	char *name;
> @@ -92,10 +94,12 @@ static void meson_ddr_perf_event_update(struct perf_event *event)
>  	int idx;
>  	int chann_nr = pmu->info.hw_info->chann_nr;
>  
> +	spin_lock(&pmu->lock);

Why doesn't this need the _irqsave() variant if we're racing with the irq
handler?

Will
  
Jiucheng Xu March 28, 2023, 2:29 a.m. UTC | #2
My Amlogic email box has some issues. Use my personal email 
<jiucheng.xu@163.com> to reply.

On 2023/3/27 22:10, Will Deacon wrote:
> [ EXTERNAL EMAIL ]
>
> On Thu, Feb 09, 2023 at 07:54:02PM +0800, Jiucheng Xu wrote:
>> When use 1ms interval, very large number of counter happens
>> once in a while as below:
>>
>> 25.968654513 281474976710655.84 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
>> 26.118657346 281474976710655.88 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
>> 26.180137180 281474976710655.66 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
>>
>> Root cause is the race between irq handler
>> and pmu.read callback. Use spin lock to protect the sw&hw
>> counters.
>>
>> Signed-off-by: Jiucheng Xu <jiucheng.xu@amlogic.com>
>> ---
>>   drivers/perf/amlogic/meson_ddr_pmu_core.c | 10 +++++++++-
>>   1 file changed, 9 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
>> index 0b24dee1ed3c..9b2e5d5c0626 100644
>> --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
>> +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
>> @@ -14,6 +14,7 @@
>>   #include <linux/perf_event.h>
>>   #include <linux/platform_device.h>
>>   #include <linux/printk.h>
>> +#include <linux/spinlock.h>
>>   #include <linux/sysfs.h>
>>   #include <linux/types.h>
>>   
>> @@ -23,6 +24,7 @@ struct ddr_pmu {
>>   	struct pmu pmu;
>>   	struct dmc_info info;
>>   	struct dmc_counter counters;	/* save counters from hw */
>> +	spinlock_t lock;		/* protect hw/sw counter */
>>   	bool pmu_enabled;
>>   	struct device *dev;
>>   	char *name;
>> @@ -92,10 +94,12 @@ static void meson_ddr_perf_event_update(struct perf_event *event)
>>   	int idx;
>>   	int chann_nr = pmu->info.hw_info->chann_nr;
>>   
>> +	spin_lock(&pmu->lock);
> Why doesn't this need the _irqsave() variant if we're racing with the irq
> handler?
>
> Will
I think meson_ddr_perf_event_update function is called with hard irq off.
So update function couldn't be interrupted by irq handler. Right?

Thanks,
Jiucheng
>
  
Will Deacon March 28, 2023, 11:55 a.m. UTC | #3
On Tue, Mar 28, 2023 at 10:29:04AM +0800, Jiucheng Xu wrote:
> 
> My Amlogic email box has some issues. Use my personal email
> <jiucheng.xu@163.com> to reply.
> 
> On 2023/3/27 22:10, Will Deacon wrote:
> > [ EXTERNAL EMAIL ]
> > 
> > On Thu, Feb 09, 2023 at 07:54:02PM +0800, Jiucheng Xu wrote:
> > > When use 1ms interval, very large number of counter happens
> > > once in a while as below:
> > > 
> > > 25.968654513 281474976710655.84 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> > > 26.118657346 281474976710655.88 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> > > 26.180137180 281474976710655.66 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> > > 
> > > Root cause is the race between irq handler
> > > and pmu.read callback. Use spin lock to protect the sw&hw
> > > counters.
> > > 
> > > Signed-off-by: Jiucheng Xu <jiucheng.xu@amlogic.com>
> > > ---
> > >   drivers/perf/amlogic/meson_ddr_pmu_core.c | 10 +++++++++-
> > >   1 file changed, 9 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
> > > index 0b24dee1ed3c..9b2e5d5c0626 100644
> > > --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
> > > +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
> > > @@ -14,6 +14,7 @@
> > >   #include <linux/perf_event.h>
> > >   #include <linux/platform_device.h>
> > >   #include <linux/printk.h>
> > > +#include <linux/spinlock.h>
> > >   #include <linux/sysfs.h>
> > >   #include <linux/types.h>
> > > @@ -23,6 +24,7 @@ struct ddr_pmu {
> > >   	struct pmu pmu;
> > >   	struct dmc_info info;
> > >   	struct dmc_counter counters;	/* save counters from hw */
> > > +	spinlock_t lock;		/* protect hw/sw counter */
> > >   	bool pmu_enabled;
> > >   	struct device *dev;
> > >   	char *name;
> > > @@ -92,10 +94,12 @@ static void meson_ddr_perf_event_update(struct perf_event *event)
> > >   	int idx;
> > >   	int chann_nr = pmu->info.hw_info->chann_nr;
> > > +	spin_lock(&pmu->lock);
> > Why doesn't this need the _irqsave() variant if we're racing with the irq
> > handler?
> > 
> > Will
> I think meson_ddr_perf_event_update function is called with hard irq off.
> So update function couldn't be interrupted by irq handler. Right?

I'm just confused about the race, then. The commit message says you have a
race between an irq handler and a callback, which you fix with a spinlock
that isn't irq safe. So either the race is real and the lock needs to be
irqsafe, or the race is something else entirely, no?

Will
  

Patch

diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
index 0b24dee1ed3c..9b2e5d5c0626 100644
--- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
+++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
@@ -14,6 +14,7 @@ 
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
+#include <linux/spinlock.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
 
@@ -23,6 +24,7 @@  struct ddr_pmu {
 	struct pmu pmu;
 	struct dmc_info info;
 	struct dmc_counter counters;	/* save counters from hw */
+	spinlock_t lock;		/* protect hw/sw counter */
 	bool pmu_enabled;
 	struct device *dev;
 	char *name;
@@ -92,10 +94,12 @@  static void meson_ddr_perf_event_update(struct perf_event *event)
 	int idx;
 	int chann_nr = pmu->info.hw_info->chann_nr;
 
+	spin_lock(&pmu->lock);
 	/* get the remain counters in register. */
 	pmu->info.hw_info->get_counters(&pmu->info, &dc);
 
 	ddr_cnt_addition(&sum_dc, &pmu->counters, &dc, chann_nr);
+	spin_unlock(&pmu->lock);
 
 	switch (event->attr.config) {
 	case ALL_CHAN_COUNTER_ID:
@@ -355,6 +359,7 @@  static irqreturn_t dmc_irq_handler(int irq, void *dev_id)
 
 	pmu = dmc_info_to_pmu(info);
 
+	spin_lock(&pmu->lock);
 	if (info->hw_info->irq_handler(info, &counters) != 0)
 		goto out;
 
@@ -372,6 +377,8 @@  static irqreturn_t dmc_irq_handler(int irq, void *dev_id)
 		 * it in ISR to support continue mode.
 		 */
 		info->hw_info->enable(info);
+out:
+	spin_unlock(&pmu->lock);
 
 	dev_dbg(pmu->dev, "counts: %llu %llu %llu, %llu, %llu, %llu\t\t"
 			"sum: %llu %llu %llu, %llu, %llu, %llu\n",
@@ -388,7 +395,7 @@  static irqreturn_t dmc_irq_handler(int irq, void *dev_id)
 			pmu->counters.channel_cnt[1],
 			pmu->counters.channel_cnt[2],
 			pmu->counters.channel_cnt[3]);
-out:
+
 	return IRQ_HANDLED;
 }
 
@@ -539,6 +546,7 @@  int meson_ddr_pmu_create(struct platform_device *pdev)
 	pmu->name = name;
 	pmu->dev = &pdev->dev;
 	pmu->pmu_enabled = false;
+	spin_lock_init(&pmu->lock);
 
 	platform_set_drvdata(pdev, pmu);