thermal/core: update cooling device during thermal zone unregistration

Message ID 20230321154627.17787-1-rui.zhang@intel.com
State New
Headers
Series thermal/core: update cooling device during thermal zone unregistration |

Commit Message

Zhang, Rui March 21, 2023, 3:46 p.m. UTC
  When unregistering a thermal zone device, update the cooling device in
case the cooling device is activated by the current thermal zone.

This fixes a problem that the frequency of ACPI processors are still
limited after unloading ACPI thermal driver while ACPI passive cooling
is activated.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/thermal_core.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)
  

Comments

Rafael J. Wysocki March 21, 2023, 7:43 p.m. UTC | #1
On Tue, Mar 21, 2023 at 4:46 PM Zhang Rui <rui.zhang@intel.com> wrote:
>
> When unregistering a thermal zone device, update the cooling device in
> case the cooling device is activated by the current thermal zone.

I think that all cooling devices bound to the thermal zone being
removed need to be updated at this point?  Which is what the patch
really does IIUC.

It also avoids unbinding cooling devices that have not been bound to that zone.

> This fixes a problem that the frequency of ACPI processors are still
> limited after unloading ACPI thermal driver while ACPI passive cooling
> is activated.
>

Cc: stable@vger ?

> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> ---
>  drivers/thermal/thermal_core.c | 26 +++++++++++++++++++++++++-
>  1 file changed, 25 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index cfd4c1afeae7..9f120e3c9bf0 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -1477,6 +1477,7 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
>         const struct thermal_zone_params *tzp;
>         struct thermal_cooling_device *cdev;
>         struct thermal_zone_device *pos = NULL;
> +       struct thermal_instance *ti;
>
>         if (!tz)
>                 return;
> @@ -1497,9 +1498,22 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
>

I would rearrange the code as follows.

>         /* Unbind all cdevs associated with 'this' thermal zone */
>         list_for_each_entry(cdev, &thermal_cdev_list, node) {
                    struct thermal_instance *ti;

> +               mutex_lock(&tz->lock);
> +               list_for_each_entry(ti, &tz->thermal_instances, tz_node) {

                            if (ti->cdev == cdev) {
                                    mutex_unlock(&tz->lock);
                                    goto unbind;
                            }
                    }
                    /* The cooling device is not used by this thermal zone. */
                    mutex_unlock(&tz->lock);
                    continue;

unbind:

>                 if (tz->ops->unbind) {
>                         tz->ops->unbind(tz, cdev);

                            /*
                             * The thermal instance for current
thermal zone has been
                             * removed, so update the cooling device
in case it has been
                             * activated by the thermal zone device going away.
                             */
                            mutex_lock(&cdev->lock);
                            __thermal_cdev_update(cdev);
                            mutex_unlock(&cdev->lock);

                            continue;
>                 }
>
>                 if (!tzp || !tzp->tbp)
  
Zhang, Rui March 22, 2023, 6 a.m. UTC | #2
On Tue, 2023-03-21 at 20:43 +0100, Rafael J. Wysocki wrote:
> On Tue, Mar 21, 2023 at 4:46 PM Zhang Rui <rui.zhang@intel.com>
> wrote:
> > When unregistering a thermal zone device, update the cooling device
> > in
> > case the cooling device is activated by the current thermal zone.
> 
> I think that all cooling devices bound to the thermal zone being
> removed need to be updated at this point?  Which is what the patch
> really does IIUC.

yes, that is what I want to say.

> 
> It also avoids unbinding cooling devices that have not been bound to
> that zone.
> 
The thermal zone device driver' .unbind() callback should be able to
handle this. But still, this is a valid improvement.

> > This fixes a problem that the frequency of ACPI processors are
> > still
> > limited after unloading ACPI thermal driver while ACPI passive
> > cooling
> > is activated.
> > 
> 
> Cc: stable@vger ?

yeah, will add it.
> 
> > Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> > ---
> >  drivers/thermal/thermal_core.c | 26 +++++++++++++++++++++++++-
> >  1 file changed, 25 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/thermal/thermal_core.c
> > b/drivers/thermal/thermal_core.c
> > index cfd4c1afeae7..9f120e3c9bf0 100644
> > --- a/drivers/thermal/thermal_core.c
> > +++ b/drivers/thermal/thermal_core.c
> > @@ -1477,6 +1477,7 @@ void thermal_zone_device_unregister(struct
> > thermal_zone_device *tz)
> >         const struct thermal_zone_params *tzp;
> >         struct thermal_cooling_device *cdev;
> >         struct thermal_zone_device *pos = NULL;
> > +       struct thermal_instance *ti;
> > 
> >         if (!tz)
> >                 return;
> > @@ -1497,9 +1498,22 @@ void thermal_zone_device_unregister(struct
> > thermal_zone_device *tz)
> > 
> 
> I would rearrange the code as follows.
> 
> >         /* Unbind all cdevs associated with 'this' thermal zone */
> >         list_for_each_entry(cdev, &thermal_cdev_list, node) {
>                     struct thermal_instance *ti;
> 
> > +               mutex_lock(&tz->lock);
> > +               list_for_each_entry(ti, &tz->thermal_instances,
> > tz_node) {
> 
>                             if (ti->cdev == cdev) {
>                                     mutex_unlock(&tz->lock);
>                                     goto unbind;
>                             }
>                     }
>                     /* The cooling device is not used by this thermal
> zone. */
>                     mutex_unlock(&tz->lock);
>                     continue;
> 
> unbind:
> 
> >                 if (tz->ops->unbind) {
> >                         tz->ops->unbind(tz, cdev);
> 
>                             /*
>                              * The thermal instance for current
> thermal zone has been
>                              * removed, so update the cooling device
> in case it has been
>                              * activated by the thermal zone device
> going away.
>                              */
>                             mutex_lock(&cdev->lock);
>                             __thermal_cdev_update(cdev);
>                             mutex_unlock(&cdev->lock);
> 
>                             continue;
> >                 }

IMO, updating the cooling device is required, no matter the cooling
device is bound by .bind() callback or by static thermal_bind_params
structure.

Actually, I think struct thermal_bind_params is obsoleted and nobody is
using it now. I will write a cleanup patch to remove it after this one.

thanks,
rui
> > 
> >                 if (!tzp || !tzp->tbp)
  
Dan Carpenter March 22, 2023, 8:07 a.m. UTC | #3
Hi Zhang,

https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Zhang-Rui/thermal-core-update-cooling-device-during-thermal-zone-unregistration/20230321-234754
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git thermal
patch link:    https://lore.kernel.org/r/20230321154627.17787-1-rui.zhang%40intel.com
patch subject: [PATCH] thermal/core: update cooling device during thermal zone unregistration
config: i386-randconfig-m021 (https://download.01.org/0day-ci/archive/20230322/202303221159.6cdRxcUo-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <error27@gmail.com>
| Link: https://lore.kernel.org/r/202303221159.6cdRxcUo-lkp@intel.com/

smatch warnings:
drivers/thermal/thermal_core.c:1436 thermal_zone_device_unregister() warn: iterator used outside loop: 'ti'

vim +/ti +1436 drivers/thermal/thermal_core.c

203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1402  void thermal_zone_device_unregister(struct thermal_zone_device *tz)
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1403  {
a5f785ce608caf drivers/thermal/thermal_core.c Dmitry Osipenko  2020-08-18  1404  	int i, tz_id;
7e8ee1e9d7561f drivers/thermal/thermal_sys.c  Durgadoss R      2012-09-18  1405  	const struct thermal_zone_params *tzp;
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1406  	struct thermal_cooling_device *cdev;
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1407  	struct thermal_zone_device *pos = NULL;
55a1117eebc62b drivers/thermal/thermal_core.c Zhang Rui        2023-03-21  1408  	struct thermal_instance *ti;
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1409  
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1410  	if (!tz)
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1411  		return;
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1412  
7e8ee1e9d7561f drivers/thermal/thermal_sys.c  Durgadoss R      2012-09-18  1413  	tzp = tz->tzp;
a5f785ce608caf drivers/thermal/thermal_core.c Dmitry Osipenko  2020-08-18  1414  	tz_id = tz->id;
7e8ee1e9d7561f drivers/thermal/thermal_sys.c  Durgadoss R      2012-09-18  1415  
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1416  	mutex_lock(&thermal_list_lock);
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1417  	list_for_each_entry(pos, &thermal_tz_list, node)
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1418  		if (pos == tz)
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1419  			break;
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1420  	if (pos != tz) {
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1421  		/* thermal zone device not found */
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1422  		mutex_unlock(&thermal_list_lock);
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1423  		return;
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1424  	}
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1425  	list_del(&tz->node);
7e8ee1e9d7561f drivers/thermal/thermal_sys.c  Durgadoss R      2012-09-18  1426  
7e8ee1e9d7561f drivers/thermal/thermal_sys.c  Durgadoss R      2012-09-18  1427  	/* Unbind all cdevs associated with 'this' thermal zone */
7e8ee1e9d7561f drivers/thermal/thermal_sys.c  Durgadoss R      2012-09-18  1428  	list_for_each_entry(cdev, &thermal_cdev_list, node) {
55a1117eebc62b drivers/thermal/thermal_core.c Zhang Rui        2023-03-21  1429  		mutex_lock(&tz->lock);
55a1117eebc62b drivers/thermal/thermal_core.c Zhang Rui        2023-03-21  1430  		list_for_each_entry(ti, &tz->thermal_instances, tz_node) {
55a1117eebc62b drivers/thermal/thermal_core.c Zhang Rui        2023-03-21  1431  			if (ti->cdev == cdev)
55a1117eebc62b drivers/thermal/thermal_core.c Zhang Rui        2023-03-21  1432  				break;
55a1117eebc62b drivers/thermal/thermal_core.c Zhang Rui        2023-03-21  1433  		}
55a1117eebc62b drivers/thermal/thermal_core.c Zhang Rui        2023-03-21  1434  
55a1117eebc62b drivers/thermal/thermal_core.c Zhang Rui        2023-03-21  1435  		/* The cooling device is not used by current thermal zone */
55a1117eebc62b drivers/thermal/thermal_core.c Zhang Rui        2023-03-21 @1436  		if (ti->cdev != cdev) {

Here we are testing to see if the loop exited by hitting the break.  If
the condition is != then "ti" is not a valid pointer and it's kind of an
out of bounds access.  I used to fix these with:

-	if (ti->cdev != cdev) {
+	if (list_entry_is_head(ti, &tz->thermal_instances, tz_node)) {

But these days we just prefer using a found variable:

	found = false;
	list_for_each_entry(ti, &tz->thermal_instances, tz_node) {
		if (ti->cdev == cdev) {
			found = true;
			break;
		}
	}
	if (!found) {

55a1117eebc62b drivers/thermal/thermal_core.c Zhang Rui        2023-03-21  1437  			mutex_unlock(&tz->lock);
55a1117eebc62b drivers/thermal/thermal_core.c Zhang Rui        2023-03-21  1438  			continue;
55a1117eebc62b drivers/thermal/thermal_core.c Zhang Rui        2023-03-21  1439  		}
55a1117eebc62b drivers/thermal/thermal_core.c Zhang Rui        2023-03-21  1440  		mutex_unlock(&tz->lock);
55a1117eebc62b drivers/thermal/thermal_core.c Zhang Rui        2023-03-21  1441  
7e8ee1e9d7561f drivers/thermal/thermal_sys.c  Durgadoss R      2012-09-18  1442  		if (tz->ops->unbind) {
203d3d4aa48233 drivers/thermal/thermal.c      Zhang Rui        2008-01-17  1443  			tz->ops->unbind(tz, cdev);
  

Patch

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index cfd4c1afeae7..9f120e3c9bf0 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1477,6 +1477,7 @@  void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 	const struct thermal_zone_params *tzp;
 	struct thermal_cooling_device *cdev;
 	struct thermal_zone_device *pos = NULL;
+	struct thermal_instance *ti;
 
 	if (!tz)
 		return;
@@ -1497,9 +1498,22 @@  void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 
 	/* Unbind all cdevs associated with 'this' thermal zone */
 	list_for_each_entry(cdev, &thermal_cdev_list, node) {
+		mutex_lock(&tz->lock);
+		list_for_each_entry(ti, &tz->thermal_instances, tz_node) {
+			if (ti->cdev == cdev)
+				break;
+		}
+
+		/* The cooling device is not used by current thermal zone */
+		if (ti->cdev != cdev) {
+			mutex_unlock(&tz->lock);
+			continue;
+		}
+		mutex_unlock(&tz->lock);
+
 		if (tz->ops->unbind) {
 			tz->ops->unbind(tz, cdev);
-			continue;
+			goto deactivate_cdev;
 		}
 
 		if (!tzp || !tzp->tbp)
@@ -1511,6 +1525,16 @@  void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 				tzp->tbp[i].cdev = NULL;
 			}
 		}
+
+deactivate_cdev:
+		/*
+		 * The thermal instances for current thermal zone has been
+		 * removed. Update the cooling device in case it is activated
+		 * by current thermal zone device.
+		 */
+		mutex_lock(&cdev->lock);
+		__thermal_cdev_update(cdev);
+		mutex_unlock(&cdev->lock);
 	}
 
 	mutex_unlock(&thermal_list_lock);