[V3] thermal/core/power_allocator: avoid thermal cdev can not be reset

Message ID 20230320095620.7480-1-di.shen@unisoc.com
State New
Headers
Series [V3] thermal/core/power_allocator: avoid thermal cdev can not be reset |

Commit Message

Di Shen March 20, 2023, 9:56 a.m. UTC
  Commit <0952177f2a1f>(thermal/core/power_allocator: Update once
cooling devices when temp is low) adds a update flag to avoid
the thermal event is triggered when there is no need, and
thermal cdev would be update once when temperature is low.

But when the trips are writable, and switch_on_temp is set
to be a higher value, the cooling device state may not be
reset to 0, because last_temperature is smaller than the
switch_on_temp.

For example:
First:
switch_on_temp=70 control_temp=85;
Then userspace change the trip_temp:
switch_on_temp=45 control_temp=55 cur_temp=54

Then userspace reset the trip_temp:
switch_on_temp=70 control_temp=85 cur_temp=57 last_temp=54

At this time, the cooling device state should be reset to 0.
However, because cur_temp(57) < switch_on_temp(70)
last_temp(54) < switch_on_temp(70)  ---->  update = false,
update is false, the cooling device state can not be reset.

This patch adds a function thermal_cdev_needs_update() to
renew the update flag value only when the trips are writable,
so that thermal cdev->state can be reset after switch_on_temp
changed from low to high.

Fixes: <0952177f2a1f> (thermal/core/power_allocator: Update once cooling devices when temp is low)
Signed-off-by: Di Shen <di.shen@unisoc.com>

---
V3:
- Add fix tag.

V2:
- Compared to v1, do not revert.

- Add a variable(last_switch_on_temp) in power_allocator_params
  to record the last switch_on_temp value.

- Adds a function to renew the update flag and update the
  last_switch_on_temp when thermal trips are writable.

V1:
- Revert commit 0952177f2a1f.
---
---
 drivers/thermal/gov_power_allocator.c | 39 ++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 6 deletions(-)
  

Comments

Lukasz Luba March 20, 2023, 12:24 p.m. UTC | #1
On 3/20/23 09:56, Di Shen wrote:
> Commit <0952177f2a1f>(thermal/core/power_allocator: Update once
> cooling devices when temp is low) adds a update flag to avoid
> the thermal event is triggered when there is no need, and
> thermal cdev would be update once when temperature is low.
> 
> But when the trips are writable, and switch_on_temp is set
> to be a higher value, the cooling device state may not be
> reset to 0, because last_temperature is smaller than the
> switch_on_temp.
> 
> For example:
> First:
> switch_on_temp=70 control_temp=85;
> Then userspace change the trip_temp:
> switch_on_temp=45 control_temp=55 cur_temp=54
> 
> Then userspace reset the trip_temp:
> switch_on_temp=70 control_temp=85 cur_temp=57 last_temp=54
> 
> At this time, the cooling device state should be reset to 0.
> However, because cur_temp(57) < switch_on_temp(70)
> last_temp(54) < switch_on_temp(70)  ---->  update = false,
> update is false, the cooling device state can not be reset.
> 
> This patch adds a function thermal_cdev_needs_update() to
> renew the update flag value only when the trips are writable,
> so that thermal cdev->state can be reset after switch_on_temp
> changed from low to high.
> 
> Fixes: <0952177f2a1f> (thermal/core/power_allocator: Update once cooling devices when temp is low)
> Signed-off-by: Di Shen <di.shen@unisoc.com>
> 
> ---
> V3:
> - Add fix tag.
> 
> V2:
> - Compared to v1, do not revert.
> 
> - Add a variable(last_switch_on_temp) in power_allocator_params
>    to record the last switch_on_temp value.
> 
> - Adds a function to renew the update flag and update the
>    last_switch_on_temp when thermal trips are writable.
> 
> V1:
> - Revert commit 0952177f2a1f.
> ---
> ---
>   drivers/thermal/gov_power_allocator.c | 39 ++++++++++++++++++++++-----
>   1 file changed, 33 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
> index 0eaf1527d3e3..c9e1f3b15f15 100644
> --- a/drivers/thermal/gov_power_allocator.c
> +++ b/drivers/thermal/gov_power_allocator.c
> @@ -59,6 +59,8 @@ static inline s64 div_frac(s64 x, s64 y)
>    *			governor switches on when this trip point is crossed.
>    *			If the thermal zone only has one passive trip point,
>    *			@trip_switch_on should be INVALID_TRIP.
> + * @last_switch_on_temp:Record the last switch_on_temp only when trips
> +			are writable.
>    * @trip_max_desired_temperature:	last passive trip point of the thermal
>    *					zone.  The temperature we are
>    *					controlling for.
> @@ -70,6 +72,9 @@ struct power_allocator_params {
>   	s64 err_integral;
>   	s32 prev_err;
>   	int trip_switch_on;
> +#ifdef CONFIG_THERMAL_WRITABLE_TRIPS
> +	int last_switch_on_temp;
> +#endif
>   	int trip_max_desired_temperature;
>   	u32 sustainable_power;
>   };
> @@ -554,6 +559,25 @@ static void get_governor_trips(struct thermal_zone_device *tz,
>   	}
>   }
>   
> +#ifdef CONFIG_THERMAL_WRITABLE_TRIPS
> +static bool thermal_cdev_needs_update(struct thermal_zone_device *tz, int switch_on_temp)
> +{
> +	bool update;
> +	struct power_allocator_params *params = tz->governor_data;
> +	int last_switch_on_temp = params->last_switch_on_temp;
> +
> +	update = (tz->last_temperature >= last_switch_on_temp);
> +	params->last_switch_on_temp = switch_on_temp;
> +
> +	return update;
> +}
> +#else
> +static inline bool thermal_cdev_needs_update(struct thermal_zone_device *tz, int switch_on_temp)
> +{
> +	return false;
> +}
> +#endif
> +
>   static void reset_pid_controller(struct power_allocator_params *params)
>   {
>   	params->err_integral = 0;
> @@ -709,12 +733,15 @@ static int power_allocator_throttle(struct thermal_zone_device *tz, int trip_id)
>   		return 0;
>   
>   	ret = __thermal_zone_get_trip(tz, params->trip_switch_on, &trip);
> -	if (!ret && (tz->temperature < trip.temperature)) {
> -		update = (tz->last_temperature >= trip.temperature);
> -		tz->passive = 0;
> -		reset_pid_controller(params);
> -		allow_maximum_power(tz, update);
> -		return 0;
> +	if (!ret) {
> +		update = thermal_cdev_needs_update(tz, trip.temperature);
> +		if (tz->temperature < trip.temperature) {
> +			update |= (tz->last_temperature >= trip.temperature);
> +			tz->passive = 0;
> +			reset_pid_controller(params);
> +			allow_maximum_power(tz, update);
> +			return 0;
> +		}
>   	}
>   
>   	tz->passive = 1;


Thanks for the patch. The code looks good. The initial value of
'last_switch_on_temp' would be set to 0. It won't harm us because it
will get the proper value later.

Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
  
Di Shen April 10, 2023, 2:09 a.m. UTC | #2
Hi Lukasz,
Could you please apply this patch if there's no more comment? Thank you.

Best regards,
Di

On Mon, Mar 20, 2023 at 8:29 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
>
>
>
> On 3/20/23 09:56, Di Shen wrote:
> > Commit <0952177f2a1f>(thermal/core/power_allocator: Update once
> > cooling devices when temp is low) adds a update flag to avoid
> > the thermal event is triggered when there is no need, and
> > thermal cdev would be update once when temperature is low.
> >
> > But when the trips are writable, and switch_on_temp is set
> > to be a higher value, the cooling device state may not be
> > reset to 0, because last_temperature is smaller than the
> > switch_on_temp.
> >
> > For example:
> > First:
> > switch_on_temp=70 control_temp=85;
> > Then userspace change the trip_temp:
> > switch_on_temp=45 control_temp=55 cur_temp=54
> >
> > Then userspace reset the trip_temp:
> > switch_on_temp=70 control_temp=85 cur_temp=57 last_temp=54
> >
> > At this time, the cooling device state should be reset to 0.
> > However, because cur_temp(57) < switch_on_temp(70)
> > last_temp(54) < switch_on_temp(70)  ---->  update = false,
> > update is false, the cooling device state can not be reset.
> >
> > This patch adds a function thermal_cdev_needs_update() to
> > renew the update flag value only when the trips are writable,
> > so that thermal cdev->state can be reset after switch_on_temp
> > changed from low to high.
> >
> > Fixes: <0952177f2a1f> (thermal/core/power_allocator: Update once cooling devices when temp is low)
> > Signed-off-by: Di Shen <di.shen@unisoc.com>
> >
> > ---
> > V3:
> > - Add fix tag.
> >
> > V2:
> > - Compared to v1, do not revert.
> >
> > - Add a variable(last_switch_on_temp) in power_allocator_params
> >    to record the last switch_on_temp value.
> >
> > - Adds a function to renew the update flag and update the
> >    last_switch_on_temp when thermal trips are writable.
> >
> > V1:
> > - Revert commit 0952177f2a1f.
> > ---
> > ---
> >   drivers/thermal/gov_power_allocator.c | 39 ++++++++++++++++++++++-----
> >   1 file changed, 33 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
> > index 0eaf1527d3e3..c9e1f3b15f15 100644
> > --- a/drivers/thermal/gov_power_allocator.c
> > +++ b/drivers/thermal/gov_power_allocator.c
> > @@ -59,6 +59,8 @@ static inline s64 div_frac(s64 x, s64 y)
> >    *                  governor switches on when this trip point is crossed.
> >    *                  If the thermal zone only has one passive trip point,
> >    *                  @trip_switch_on should be INVALID_TRIP.
> > + * @last_switch_on_temp:Record the last switch_on_temp only when trips
> > +                     are writable.
> >    * @trip_max_desired_temperature:   last passive trip point of the thermal
> >    *                                  zone.  The temperature we are
> >    *                                  controlling for.
> > @@ -70,6 +72,9 @@ struct power_allocator_params {
> >       s64 err_integral;
> >       s32 prev_err;
> >       int trip_switch_on;
> > +#ifdef CONFIG_THERMAL_WRITABLE_TRIPS
> > +     int last_switch_on_temp;
> > +#endif
> >       int trip_max_desired_temperature;
> >       u32 sustainable_power;
> >   };
> > @@ -554,6 +559,25 @@ static void get_governor_trips(struct thermal_zone_device *tz,
> >       }
> >   }
> >
> > +#ifdef CONFIG_THERMAL_WRITABLE_TRIPS
> > +static bool thermal_cdev_needs_update(struct thermal_zone_device *tz, int switch_on_temp)
> > +{
> > +     bool update;
> > +     struct power_allocator_params *params = tz->governor_data;
> > +     int last_switch_on_temp = params->last_switch_on_temp;
> > +
> > +     update = (tz->last_temperature >= last_switch_on_temp);
> > +     params->last_switch_on_temp = switch_on_temp;
> > +
> > +     return update;
> > +}
> > +#else
> > +static inline bool thermal_cdev_needs_update(struct thermal_zone_device *tz, int switch_on_temp)
> > +{
> > +     return false;
> > +}
> > +#endif
> > +
> >   static void reset_pid_controller(struct power_allocator_params *params)
> >   {
> >       params->err_integral = 0;
> > @@ -709,12 +733,15 @@ static int power_allocator_throttle(struct thermal_zone_device *tz, int trip_id)
> >               return 0;
> >
> >       ret = __thermal_zone_get_trip(tz, params->trip_switch_on, &trip);
> > -     if (!ret && (tz->temperature < trip.temperature)) {
> > -             update = (tz->last_temperature >= trip.temperature);
> > -             tz->passive = 0;
> > -             reset_pid_controller(params);
> > -             allow_maximum_power(tz, update);
> > -             return 0;
> > +     if (!ret) {
> > +             update = thermal_cdev_needs_update(tz, trip.temperature);
> > +             if (tz->temperature < trip.temperature) {
> > +                     update |= (tz->last_temperature >= trip.temperature);
> > +                     tz->passive = 0;
> > +                     reset_pid_controller(params);
> > +                     allow_maximum_power(tz, update);
> > +                     return 0;
> > +             }
> >       }
> >
> >       tz->passive = 1;
>
>
> Thanks for the patch. The code looks good. The initial value of
> 'last_switch_on_temp' would be set to 0. It won't harm us because it
> will get the proper value later.
>
> Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
  
Daniel Lezcano April 10, 2023, 7:51 p.m. UTC | #3
On 10/04/2023 04:09, Di Shen wrote:
> Hi Lukasz,
> Could you please apply this patch if there's no more comment? Thank you.

Hi,

I take care of applying the patches. Give me some time to read the changes.

Thanks
   -- Daniel
  
Lukasz Luba April 11, 2023, 7:17 a.m. UTC | #4
On 4/10/23 20:51, Daniel Lezcano wrote:
> On 10/04/2023 04:09, Di Shen wrote:
>> Hi Lukasz,
>> Could you please apply this patch if there's no more comment? Thank you.
> 
> Hi,
> 
> I take care of applying the patches. Give me some time to read the changes.
> 
> Thanks
>    -- Daniel
> 

Thank you Daniel!

Regards,
Lukasz
  
Di Shen April 13, 2023, 4:51 a.m. UTC | #5
Thank you Daniel. Any comments would be appreciated!

Best regards,
Di


On Tue, Apr 11, 2023 at 3:51 AM Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
>
> On 10/04/2023 04:09, Di Shen wrote:
> > Hi Lukasz,
> > Could you please apply this patch if there's no more comment? Thank you.
>
> Hi,
>
> I take care of applying the patches. Give me some time to read the changes.
>
> Thanks
>    -- Daniel
>
> --
> <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>
> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
> <http://twitter.com/#!/linaroorg> Twitter |
> <http://www.linaro.org/linaro-blog/> Blog
>
  
Daniel Lezcano April 13, 2023, 7:37 a.m. UTC | #6
On 13/04/2023 06:51, Di Shen wrote:
> Thank you Daniel. Any comments would be appreciated!

Still thinking about the changes...

For my understanding, why do you change the 'switch_on' trip point on 
the fly ?



> On Tue, Apr 11, 2023 at 3:51 AM Daniel Lezcano
> <daniel.lezcano@linaro.org> wrote:
>>
>> On 10/04/2023 04:09, Di Shen wrote:
>>> Hi Lukasz,
>>> Could you please apply this patch if there's no more comment? Thank you.
>>
>> Hi,
>>
>> I take care of applying the patches. Give me some time to read the changes.
>>
>> Thanks
>>     -- Daniel
>>
>> --
>> <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>>
>> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
>> <http://twitter.com/#!/linaroorg> Twitter |
>> <http://www.linaro.org/linaro-blog/> Blog
>>
  
Di Shen April 13, 2023, 8:40 a.m. UTC | #7
We have discussed this question in patch-v1:
https://lore.kernel.org/all/f6aaa5f1-495d-a158-14d8-ddb2bffbd9c2@arm.com/

Simply put, we use the trip_temp in the Android System; set different
trip_temp for thermal control of different scenarios.

On Thu, Apr 13, 2023 at 3:37 PM Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
>
> On 13/04/2023 06:51, Di Shen wrote:
> > Thank you Daniel. Any comments would be appreciated!
>
> Still thinking about the changes...
>
> For my understanding, why do you change the 'switch_on' trip point on
> the fly ?
>
>
>
> > On Tue, Apr 11, 2023 at 3:51 AM Daniel Lezcano
> > <daniel.lezcano@linaro.org> wrote:
> >>
> >> On 10/04/2023 04:09, Di Shen wrote:
> >>> Hi Lukasz,
> >>> Could you please apply this patch if there's no more comment? Thank you.
> >>
> >> Hi,
> >>
> >> I take care of applying the patches. Give me some time to read the changes.
> >>
> >> Thanks
> >>     -- Daniel
> >>
> >> --
> >> <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
> >>
> >> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
> >> <http://twitter.com/#!/linaroorg> Twitter |
> >> <http://www.linaro.org/linaro-blog/> Blog
> >>
>
> --
> <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>
> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
> <http://twitter.com/#!/linaroorg> Twitter |
> <http://www.linaro.org/linaro-blog/> Blog
>
  
Daniel Lezcano April 14, 2023, 11:12 a.m. UTC | #8
On 13/04/2023 10:40, Di Shen wrote:
> We have discussed this question in patch-v1:
> https://lore.kernel.org/all/f6aaa5f1-495d-a158-14d8-ddb2bffbd9c2@arm.com/
> 
> Simply put, we use the trip_temp in the Android System; set different
> trip_temp for thermal control of different scenarios.

The changes are dealing with the trip points and trying to detect the 
threshold. That part should be handled in the thermal core or thermal 
trip side, not in the governor.

AFAICT, if a trip point is changed, then the power allocator should be 
reset, including the cdev state.

It would be more convenient to add an ops to the governor ops structure 
to reset the governor and then call it when a trip point is changed in 
thermal_zone_set_trip()
  
Lukasz Luba April 14, 2023, 2:18 p.m. UTC | #9
On 4/14/23 12:12, Daniel Lezcano wrote:
> On 13/04/2023 10:40, Di Shen wrote:
>> We have discussed this question in patch-v1:
>> https://lore.kernel.org/all/f6aaa5f1-495d-a158-14d8-ddb2bffbd9c2@arm.com/
>>
>> Simply put, we use the trip_temp in the Android System; set different
>> trip_temp for thermal control of different scenarios.
> 
> The changes are dealing with the trip points and trying to detect the 
> threshold. That part should be handled in the thermal core or thermal 
> trip side, not in the governor.
> 
> AFAICT, if a trip point is changed, then the power allocator should be 
> reset, including the cdev state.
> 
> It would be more convenient to add an ops to the governor ops structure 
> to reset the governor and then call it when a trip point is changed in 
> thermal_zone_set_trip()
> 
> 

Sounds reasonable to have a proper API and fwk handling this corner
case scenario.
Although, if there is a need for a 'easy-to-backport' fix for IPA only,
I agree with this patch, since it's straight forward to put in some
Android kernel. We can later fix the framework to handle this properly.
Anyway, both ways are OK to me.

Regards,
Lukasz
  
Daniel Lezcano April 14, 2023, 3:06 p.m. UTC | #10
On 14/04/2023 16:18, Lukasz Luba wrote:
> 
> 
> On 4/14/23 12:12, Daniel Lezcano wrote:
>> On 13/04/2023 10:40, Di Shen wrote:
>>> We have discussed this question in patch-v1:
>>> https://lore.kernel.org/all/f6aaa5f1-495d-a158-14d8-ddb2bffbd9c2@arm.com/
>>>
>>> Simply put, we use the trip_temp in the Android System; set different
>>> trip_temp for thermal control of different scenarios.
>>
>> The changes are dealing with the trip points and trying to detect the 
>> threshold. That part should be handled in the thermal core or thermal 
>> trip side, not in the governor.
>>
>> AFAICT, if a trip point is changed, then the power allocator should be 
>> reset, including the cdev state.
>>
>> It would be more convenient to add an ops to the governor ops 
>> structure to reset the governor and then call it when a trip point is 
>> changed in thermal_zone_set_trip()
>>
>>
> 
> Sounds reasonable to have a proper API and fwk handling this corner
> case scenario.
> Although, if there is a need for a 'easy-to-backport' fix for IPA only,
> I agree with this patch, since it's straight forward to put in some
> Android kernel. We can later fix the framework to handle this properly.
> Anyway, both ways are OK to me.

Unfortunately, we can not do the maintenance of the Linux kernel based 
on an 'easy-to-backport' policy to Android.

This patch could be applied from-list to Android as a hotfix. But for 
Linux the fix should be more elaborated. One solution is to add a 
'reset' ops and call it from the trip point update function.

Did you double check the issue is not impacting the other governors too ?
  
Lukasz Luba April 14, 2023, 3:21 p.m. UTC | #11
On 4/14/23 16:06, Daniel Lezcano wrote:
> On 14/04/2023 16:18, Lukasz Luba wrote:
>>
>>
>> On 4/14/23 12:12, Daniel Lezcano wrote:
>>> On 13/04/2023 10:40, Di Shen wrote:
>>>> We have discussed this question in patch-v1:
>>>> https://lore.kernel.org/all/f6aaa5f1-495d-a158-14d8-ddb2bffbd9c2@arm.com/
>>>>
>>>> Simply put, we use the trip_temp in the Android System; set different
>>>> trip_temp for thermal control of different scenarios.
>>>
>>> The changes are dealing with the trip points and trying to detect the 
>>> threshold. That part should be handled in the thermal core or thermal 
>>> trip side, not in the governor.
>>>
>>> AFAICT, if a trip point is changed, then the power allocator should 
>>> be reset, including the cdev state.
>>>
>>> It would be more convenient to add an ops to the governor ops 
>>> structure to reset the governor and then call it when a trip point is 
>>> changed in thermal_zone_set_trip()
>>>
>>>
>>
>> Sounds reasonable to have a proper API and fwk handling this corner
>> case scenario.
>> Although, if there is a need for a 'easy-to-backport' fix for IPA only,
>> I agree with this patch, since it's straight forward to put in some
>> Android kernel. We can later fix the framework to handle this properly.
>> Anyway, both ways are OK to me.
> 
> Unfortunately, we can not do the maintenance of the Linux kernel based 
> on an 'easy-to-backport' policy to Android.
> 
> This patch could be applied from-list to Android as a hotfix. But for 
> Linux the fix should be more elaborated. One solution is to add a 
> 'reset' ops and call it from the trip point update function.

Fair enough.

> 
> Did you double check the issue is not impacting the other governors too ?

No, unfortunately, I haven't checked other governors.
  
Di Shen May 25, 2023, 6:39 a.m. UTC | #12
On Fri, Apr 14, 2023 at 11:21 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
>
>
>
> On 4/14/23 16:06, Daniel Lezcano wrote:
> > On 14/04/2023 16:18, Lukasz Luba wrote:
> >>
> >>
> >> On 4/14/23 12:12, Daniel Lezcano wrote:
> >>> On 13/04/2023 10:40, Di Shen wrote:
> >>>> We have discussed this question in patch-v1:
> >>>> https://lore.kernel.org/all/f6aaa5f1-495d-a158-14d8-ddb2bffbd9c2@arm.com/
> >>>>
> >>>> Simply put, we use the trip_temp in the Android System; set different
> >>>> trip_temp for thermal control of different scenarios.
> >>>
> >>> The changes are dealing with the trip points and trying to detect the
> >>> threshold. That part should be handled in the thermal core or thermal
> >>> trip side, not in the governor.
> >>>
> >>> AFAICT, if a trip point is changed, then the power allocator should
> >>> be reset, including the cdev state.
> >>>
> >>> It would be more convenient to add an ops to the governor ops
> >>> structure to reset the governor and then call it when a trip point is
> >>> changed in thermal_zone_set_trip()
> >>>
> >>>
> >>
> >> Sounds reasonable to have a proper API and fwk handling this corner
> >> case scenario.
> >> Although, if there is a need for a 'easy-to-backport' fix for IPA only,
> >> I agree with this patch, since it's straight forward to put in some
> >> Android kernel. We can later fix the framework to handle this properly.
> >> Anyway, both ways are OK to me.
> >
> > Unfortunately, we can not do the maintenance of the Linux kernel based
> > on an 'easy-to-backport' policy to Android.
> >
> > This patch could be applied from-list to Android as a hotfix. But for
> > Linux the fix should be more elaborated. One solution is to add a
> > 'reset' ops and call it from the trip point update function.
>
> Fair enough.
>
> >
> > Did you double check the issue is not impacting the other governors too ?
>
> No, unfortunately, I haven't checked other governors.

Hi Lukasz and Daniel,
I rethought about this issue, and have tried three ways to solve it.
Finally, I realized that the root cause might be the cdev->state
update and notify. We should get back to take cdev->state into
account.

The three ways:
1.From trips updating perspective:
As your suggestion,add an ops function for thermal_governor
structure,define it in IPA governor, and call it when trips are
changed by userspace(sysfs node).

2.From cdev->state updating perspective:
For example, for gov_power_allocator there are two branches reached to
__thermal_cdev_update.

power_allocator_trottle
        |
allow_maximum_power()[gov_power_allocator.c]
        |
__thermal_cdev_update()[thermal_helpers.c]<<<<<<<(1)
        |
thermal_cdev_set_cur_state()
        |
cdev->ops->set_cur_state()
        |
thermal_notify_cdev_state_update()
        |
     .......


power_allocator_throttle()[gov_power_allocator.c]
        |
allocate_power()
        |
power_actor_set_power()
        |
__thermal_cdev_update()[thermal_helpers.c]<<<<<<(2)
        |
      ......

Add a variable last_state for thermal_cooling_device structure to
record the last target cooling state, and before
thermal_notify_cdev_state_update, determine whether the last_state is
equal to current state. If not equal, then
thermal_notify_cdev_state_update.

static void thermal_cdev_set_cur_state(struct thermal_cooling_device
*cdev,
~                                        unsigned long target)
{
        if (cdev->ops->set_cur_state(cdev, target))
                return;

~       if (cdev->last_state != target)
+               thermal_notify_cdev_state_update(cdev->id, target);
+
+       cdev->last_state = target;
+
        thermal_cooling_device_stats_update(cdev, target);
}

In this way, it will only update and notify when the state is changed
which means there's no need to use update flag to make sure it updates
cdev->state only once.

It can avoid a lot of unnecessary notifications, not only when the
temperature drops below the first trip point(at this situation
cdev->state is always 0) but also when the policy is working.

3.Similar to the second method, but an easier one.
Actually, in the set_cur_state ops of every cooling device, it has
already checked whether the last cooling state is equal to current
value or not, and returns 0. Like cpufreq cooling device:
static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
                                unsigned long state)
{
        //.......
        /* Request state should be less than max_level */
        if (state > cpufreq_cdev->max_level)
                return -EINVAL;

        /* Check if the old cooling action is same as new cooling
action */
        if (cpufreq_cdev->cpufreq_state == state)
                return 0; //return -EAGAIN;
}

What if return a non-zero value? 1 or -EAGAIN(means thy again)? Then
thermal_cdev_set_cur_state() in __thermal_cdev_update() can return
directly without update and notify.

I prefer method 3. Because there's no more new variable or function
compared to 1 and 2, and it can make code more brief.

Well, what do you think about the three ways? Look forward to your
comments. Thank you!

Best regards,
Di
  

Patch

diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
index 0eaf1527d3e3..c9e1f3b15f15 100644
--- a/drivers/thermal/gov_power_allocator.c
+++ b/drivers/thermal/gov_power_allocator.c
@@ -59,6 +59,8 @@  static inline s64 div_frac(s64 x, s64 y)
  *			governor switches on when this trip point is crossed.
  *			If the thermal zone only has one passive trip point,
  *			@trip_switch_on should be INVALID_TRIP.
+ * @last_switch_on_temp:Record the last switch_on_temp only when trips
+			are writable.
  * @trip_max_desired_temperature:	last passive trip point of the thermal
  *					zone.  The temperature we are
  *					controlling for.
@@ -70,6 +72,9 @@  struct power_allocator_params {
 	s64 err_integral;
 	s32 prev_err;
 	int trip_switch_on;
+#ifdef CONFIG_THERMAL_WRITABLE_TRIPS
+	int last_switch_on_temp;
+#endif
 	int trip_max_desired_temperature;
 	u32 sustainable_power;
 };
@@ -554,6 +559,25 @@  static void get_governor_trips(struct thermal_zone_device *tz,
 	}
 }
 
+#ifdef CONFIG_THERMAL_WRITABLE_TRIPS
+static bool thermal_cdev_needs_update(struct thermal_zone_device *tz, int switch_on_temp)
+{
+	bool update;
+	struct power_allocator_params *params = tz->governor_data;
+	int last_switch_on_temp = params->last_switch_on_temp;
+
+	update = (tz->last_temperature >= last_switch_on_temp);
+	params->last_switch_on_temp = switch_on_temp;
+
+	return update;
+}
+#else
+static inline bool thermal_cdev_needs_update(struct thermal_zone_device *tz, int switch_on_temp)
+{
+	return false;
+}
+#endif
+
 static void reset_pid_controller(struct power_allocator_params *params)
 {
 	params->err_integral = 0;
@@ -709,12 +733,15 @@  static int power_allocator_throttle(struct thermal_zone_device *tz, int trip_id)
 		return 0;
 
 	ret = __thermal_zone_get_trip(tz, params->trip_switch_on, &trip);
-	if (!ret && (tz->temperature < trip.temperature)) {
-		update = (tz->last_temperature >= trip.temperature);
-		tz->passive = 0;
-		reset_pid_controller(params);
-		allow_maximum_power(tz, update);
-		return 0;
+	if (!ret) {
+		update = thermal_cdev_needs_update(tz, trip.temperature);
+		if (tz->temperature < trip.temperature) {
+			update |= (tz->last_temperature >= trip.temperature);
+			tz->passive = 0;
+			reset_pid_controller(params);
+			allow_maximum_power(tz, update);
+			return 0;
+		}
 	}
 
 	tz->passive = 1;