thermal/intel: Fix intel_tcc_get_temp() to support negative CPU temperature

Message ID 20240206015409.619127-1-rui.zhang@intel.com
State New
Headers
Series thermal/intel: Fix intel_tcc_get_temp() to support negative CPU temperature |

Commit Message

Zhang, Rui Feb. 6, 2024, 1:54 a.m. UTC
  CPU temperature can be negative in some cases. Thus the negative CPU
temperature should not be considered as a failure.

Fix intel_tcc_get_temp() and its users to support negative CPU
temperature.

Fixes: a3c1f066e1c5 ("thermal/intel: Introduce Intel TCC library")
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 .../intel/int340x_thermal/processor_thermal_device.c |  8 ++++----
 drivers/thermal/intel/intel_tcc.c                    | 12 ++++++------
 drivers/thermal/intel/x86_pkg_temp_thermal.c         |  8 ++++----
 include/linux/intel_tcc.h                            |  2 +-
 4 files changed, 15 insertions(+), 15 deletions(-)
  

Comments

Stanislaw Gruszka Feb. 6, 2024, 3:52 p.m. UTC | #1
On Tue, Feb 06, 2024 at 09:54:09AM +0800, Zhang Rui wrote:
> CPU temperature can be negative in some cases. Thus the negative CPU
> temperature should not be considered as a failure.
> 
> Fix intel_tcc_get_temp() and its users to support negative CPU
> temperature.
> 
> Fixes: a3c1f066e1c5 ("thermal/intel: Introduce Intel TCC library")
> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>

> ---
>  .../intel/int340x_thermal/processor_thermal_device.c |  8 ++++----
>  drivers/thermal/intel/intel_tcc.c                    | 12 ++++++------
>  drivers/thermal/intel/x86_pkg_temp_thermal.c         |  8 ++++----
>  include/linux/intel_tcc.h                            |  2 +-
>  4 files changed, 15 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
> index 649f67fdf345..d75fae7b7ed2 100644
> --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
> +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
> @@ -176,14 +176,14 @@ static int proc_thermal_get_zone_temp(struct thermal_zone_device *zone,
>  					 int *temp)
>  {
>  	int cpu;
> -	int curr_temp;
> +	int curr_temp, ret;
>  
>  	*temp = 0;
>  
>  	for_each_online_cpu(cpu) {
> -		curr_temp = intel_tcc_get_temp(cpu, false);
> -		if (curr_temp < 0)
> -			return curr_temp;
> +		ret = intel_tcc_get_temp(cpu, &curr_temp, false);
> +		if (ret < 0)
> +			return ret;
>  		if (!*temp || curr_temp > *temp)
>  			*temp = curr_temp;
>  	}
> diff --git a/drivers/thermal/intel/intel_tcc.c b/drivers/thermal/intel/intel_tcc.c
> index 2e5c741c41ca..5e8b7f34b395 100644
> --- a/drivers/thermal/intel/intel_tcc.c
> +++ b/drivers/thermal/intel/intel_tcc.c
> @@ -103,18 +103,19 @@ EXPORT_SYMBOL_NS_GPL(intel_tcc_set_offset, INTEL_TCC);
>  /**
>   * intel_tcc_get_temp() - returns the current temperature
>   * @cpu: cpu that the MSR should be run on, nagative value means any cpu.
> + * @temp: pointer to the memory for saving cpu temperature.
>   * @pkg: true: Package Thermal Sensor. false: Core Thermal Sensor.
>   *
>   * Get the current temperature returned by the CPU core/package level
>   * thermal sensor, in degrees C.
>   *
> - * Return: Temperature in degrees C on success, negative error code otherwise.
> + * Return: 0 on success, negative error code otherwise.
>   */
> -int intel_tcc_get_temp(int cpu, bool pkg)
> +int intel_tcc_get_temp(int cpu, int *temp, bool pkg)
>  {
>  	u32 low, high;
>  	u32 msr = pkg ? MSR_IA32_PACKAGE_THERM_STATUS : MSR_IA32_THERM_STATUS;
> -	int tjmax, temp, err;
> +	int tjmax, err;
>  
>  	tjmax = intel_tcc_get_tjmax(cpu);
>  	if (tjmax < 0)
> @@ -131,9 +132,8 @@ int intel_tcc_get_temp(int cpu, bool pkg)
>  	if (!(low & BIT(31)))
>  		return -ENODATA;
>  
> -	temp = tjmax - ((low >> 16) & 0x7f);
> +	*temp = tjmax - ((low >> 16) & 0x7f);
>  
> -	/* Do not allow negative CPU temperature */
> -	return temp >= 0 ? temp : -ENODATA;
> +	return 0;
>  }
>  EXPORT_SYMBOL_NS_GPL(intel_tcc_get_temp, INTEL_TCC);
> diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c
> index 11a7f8108bbb..61c3d450ee60 100644
> --- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
> +++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
> @@ -108,11 +108,11 @@ static struct zone_device *pkg_temp_thermal_get_dev(unsigned int cpu)
>  static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
>  {
>  	struct zone_device *zonedev = thermal_zone_device_priv(tzd);
> -	int val;
> +	int val, ret;
>  
> -	val = intel_tcc_get_temp(zonedev->cpu, true);
> -	if (val < 0)
> -		return val;
> +	ret = intel_tcc_get_temp(zonedev->cpu, &val, true);
> +	if (ret < 0)
> +		return ret;
>  
>  	*temp = val * 1000;
>  	pr_debug("sys_get_curr_temp %d\n", *temp);
> diff --git a/include/linux/intel_tcc.h b/include/linux/intel_tcc.h
> index f422612c28d6..8ff8eabb4a98 100644
> --- a/include/linux/intel_tcc.h
> +++ b/include/linux/intel_tcc.h
> @@ -13,6 +13,6 @@
>  int intel_tcc_get_tjmax(int cpu);
>  int intel_tcc_get_offset(int cpu);
>  int intel_tcc_set_offset(int cpu, int offset);
> -int intel_tcc_get_temp(int cpu, bool pkg);
> +int intel_tcc_get_temp(int cpu, int *temp, bool pkg);
>  
>  #endif /* __INTEL_TCC_H__ */
> -- 
> 2.34.1
> 
>
  
Rafael J. Wysocki Feb. 12, 2024, 5:43 p.m. UTC | #2
On Tue, Feb 6, 2024 at 4:53 PM Stanislaw Gruszka
<stanislaw.gruszka@linux.intel.com> wrote:
>
> On Tue, Feb 06, 2024 at 09:54:09AM +0800, Zhang Rui wrote:
> > CPU temperature can be negative in some cases. Thus the negative CPU
> > temperature should not be considered as a failure.
> >
> > Fix intel_tcc_get_temp() and its users to support negative CPU
> > temperature.
> >
> > Fixes: a3c1f066e1c5 ("thermal/intel: Introduce Intel TCC library")
> > Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
>
> > ---
> >  .../intel/int340x_thermal/processor_thermal_device.c |  8 ++++----
> >  drivers/thermal/intel/intel_tcc.c                    | 12 ++++++------
> >  drivers/thermal/intel/x86_pkg_temp_thermal.c         |  8 ++++----
> >  include/linux/intel_tcc.h                            |  2 +-
> >  4 files changed, 15 insertions(+), 15 deletions(-)
> >
> > diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
> > index 649f67fdf345..d75fae7b7ed2 100644
> > --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
> > +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
> > @@ -176,14 +176,14 @@ static int proc_thermal_get_zone_temp(struct thermal_zone_device *zone,
> >                                        int *temp)
> >  {
> >       int cpu;
> > -     int curr_temp;
> > +     int curr_temp, ret;
> >
> >       *temp = 0;
> >
> >       for_each_online_cpu(cpu) {
> > -             curr_temp = intel_tcc_get_temp(cpu, false);
> > -             if (curr_temp < 0)
> > -                     return curr_temp;
> > +             ret = intel_tcc_get_temp(cpu, &curr_temp, false);
> > +             if (ret < 0)
> > +                     return ret;
> >               if (!*temp || curr_temp > *temp)
> >                       *temp = curr_temp;
> >       }
> > diff --git a/drivers/thermal/intel/intel_tcc.c b/drivers/thermal/intel/intel_tcc.c
> > index 2e5c741c41ca..5e8b7f34b395 100644
> > --- a/drivers/thermal/intel/intel_tcc.c
> > +++ b/drivers/thermal/intel/intel_tcc.c
> > @@ -103,18 +103,19 @@ EXPORT_SYMBOL_NS_GPL(intel_tcc_set_offset, INTEL_TCC);
> >  /**
> >   * intel_tcc_get_temp() - returns the current temperature
> >   * @cpu: cpu that the MSR should be run on, nagative value means any cpu.
> > + * @temp: pointer to the memory for saving cpu temperature.
> >   * @pkg: true: Package Thermal Sensor. false: Core Thermal Sensor.
> >   *
> >   * Get the current temperature returned by the CPU core/package level
> >   * thermal sensor, in degrees C.
> >   *
> > - * Return: Temperature in degrees C on success, negative error code otherwise.
> > + * Return: 0 on success, negative error code otherwise.
> >   */
> > -int intel_tcc_get_temp(int cpu, bool pkg)
> > +int intel_tcc_get_temp(int cpu, int *temp, bool pkg)
> >  {
> >       u32 low, high;
> >       u32 msr = pkg ? MSR_IA32_PACKAGE_THERM_STATUS : MSR_IA32_THERM_STATUS;
> > -     int tjmax, temp, err;
> > +     int tjmax, err;
> >
> >       tjmax = intel_tcc_get_tjmax(cpu);
> >       if (tjmax < 0)
> > @@ -131,9 +132,8 @@ int intel_tcc_get_temp(int cpu, bool pkg)
> >       if (!(low & BIT(31)))
> >               return -ENODATA;
> >
> > -     temp = tjmax - ((low >> 16) & 0x7f);
> > +     *temp = tjmax - ((low >> 16) & 0x7f);
> >
> > -     /* Do not allow negative CPU temperature */
> > -     return temp >= 0 ? temp : -ENODATA;
> > +     return 0;
> >  }
> >  EXPORT_SYMBOL_NS_GPL(intel_tcc_get_temp, INTEL_TCC);
> > diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c
> > index 11a7f8108bbb..61c3d450ee60 100644
> > --- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
> > +++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
> > @@ -108,11 +108,11 @@ static struct zone_device *pkg_temp_thermal_get_dev(unsigned int cpu)
> >  static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
> >  {
> >       struct zone_device *zonedev = thermal_zone_device_priv(tzd);
> > -     int val;
> > +     int val, ret;
> >
> > -     val = intel_tcc_get_temp(zonedev->cpu, true);
> > -     if (val < 0)
> > -             return val;
> > +     ret = intel_tcc_get_temp(zonedev->cpu, &val, true);
> > +     if (ret < 0)
> > +             return ret;
> >
> >       *temp = val * 1000;
> >       pr_debug("sys_get_curr_temp %d\n", *temp);
> > diff --git a/include/linux/intel_tcc.h b/include/linux/intel_tcc.h
> > index f422612c28d6..8ff8eabb4a98 100644
> > --- a/include/linux/intel_tcc.h
> > +++ b/include/linux/intel_tcc.h
> > @@ -13,6 +13,6 @@
> >  int intel_tcc_get_tjmax(int cpu);
> >  int intel_tcc_get_offset(int cpu);
> >  int intel_tcc_set_offset(int cpu, int offset);
> > -int intel_tcc_get_temp(int cpu, bool pkg);
> > +int intel_tcc_get_temp(int cpu, int *temp, bool pkg);
> >
> >  #endif /* __INTEL_TCC_H__ */
> > --

Applied as 6.9 material, thanks!
  

Patch

diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
index 649f67fdf345..d75fae7b7ed2 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
@@ -176,14 +176,14 @@  static int proc_thermal_get_zone_temp(struct thermal_zone_device *zone,
 					 int *temp)
 {
 	int cpu;
-	int curr_temp;
+	int curr_temp, ret;
 
 	*temp = 0;
 
 	for_each_online_cpu(cpu) {
-		curr_temp = intel_tcc_get_temp(cpu, false);
-		if (curr_temp < 0)
-			return curr_temp;
+		ret = intel_tcc_get_temp(cpu, &curr_temp, false);
+		if (ret < 0)
+			return ret;
 		if (!*temp || curr_temp > *temp)
 			*temp = curr_temp;
 	}
diff --git a/drivers/thermal/intel/intel_tcc.c b/drivers/thermal/intel/intel_tcc.c
index 2e5c741c41ca..5e8b7f34b395 100644
--- a/drivers/thermal/intel/intel_tcc.c
+++ b/drivers/thermal/intel/intel_tcc.c
@@ -103,18 +103,19 @@  EXPORT_SYMBOL_NS_GPL(intel_tcc_set_offset, INTEL_TCC);
 /**
  * intel_tcc_get_temp() - returns the current temperature
  * @cpu: cpu that the MSR should be run on, nagative value means any cpu.
+ * @temp: pointer to the memory for saving cpu temperature.
  * @pkg: true: Package Thermal Sensor. false: Core Thermal Sensor.
  *
  * Get the current temperature returned by the CPU core/package level
  * thermal sensor, in degrees C.
  *
- * Return: Temperature in degrees C on success, negative error code otherwise.
+ * Return: 0 on success, negative error code otherwise.
  */
-int intel_tcc_get_temp(int cpu, bool pkg)
+int intel_tcc_get_temp(int cpu, int *temp, bool pkg)
 {
 	u32 low, high;
 	u32 msr = pkg ? MSR_IA32_PACKAGE_THERM_STATUS : MSR_IA32_THERM_STATUS;
-	int tjmax, temp, err;
+	int tjmax, err;
 
 	tjmax = intel_tcc_get_tjmax(cpu);
 	if (tjmax < 0)
@@ -131,9 +132,8 @@  int intel_tcc_get_temp(int cpu, bool pkg)
 	if (!(low & BIT(31)))
 		return -ENODATA;
 
-	temp = tjmax - ((low >> 16) & 0x7f);
+	*temp = tjmax - ((low >> 16) & 0x7f);
 
-	/* Do not allow negative CPU temperature */
-	return temp >= 0 ? temp : -ENODATA;
+	return 0;
 }
 EXPORT_SYMBOL_NS_GPL(intel_tcc_get_temp, INTEL_TCC);
diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c
index 11a7f8108bbb..61c3d450ee60 100644
--- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
@@ -108,11 +108,11 @@  static struct zone_device *pkg_temp_thermal_get_dev(unsigned int cpu)
 static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
 {
 	struct zone_device *zonedev = thermal_zone_device_priv(tzd);
-	int val;
+	int val, ret;
 
-	val = intel_tcc_get_temp(zonedev->cpu, true);
-	if (val < 0)
-		return val;
+	ret = intel_tcc_get_temp(zonedev->cpu, &val, true);
+	if (ret < 0)
+		return ret;
 
 	*temp = val * 1000;
 	pr_debug("sys_get_curr_temp %d\n", *temp);
diff --git a/include/linux/intel_tcc.h b/include/linux/intel_tcc.h
index f422612c28d6..8ff8eabb4a98 100644
--- a/include/linux/intel_tcc.h
+++ b/include/linux/intel_tcc.h
@@ -13,6 +13,6 @@ 
 int intel_tcc_get_tjmax(int cpu);
 int intel_tcc_get_offset(int cpu);
 int intel_tcc_set_offset(int cpu, int offset);
-int intel_tcc_get_temp(int cpu, bool pkg);
+int intel_tcc_get_temp(int cpu, int *temp, bool pkg);
 
 #endif /* __INTEL_TCC_H__ */