[1/3] thermal/drivers/intel: Use generic trip points for quark_dts

Message ID 20230118181622.33335-1-daniel.lezcano@linaro.org
State New
Headers
Series [1/3] thermal/drivers/intel: Use generic trip points for quark_dts |

Commit Message

Daniel Lezcano Jan. 18, 2023, 6:16 p.m. UTC
  The thermal framework gives the possibility to register the trip
points with the thermal zone. When that is done, no get_trip_* ops are
needed and they can be removed.

Convert ops content logic into generic trip points and register them with the
thermal zone.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 .../thermal/intel/intel_quark_dts_thermal.c   | 56 +++++++++----------
 1 file changed, 25 insertions(+), 31 deletions(-)
  

Comments

Rafael J. Wysocki Jan. 26, 2023, 2:15 p.m. UTC | #1
On Wed, Jan 18, 2023 at 7:16 PM Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
>
> The thermal framework gives the possibility to register the trip
> points with the thermal zone. When that is done, no get_trip_* ops are
> needed and they can be removed.
>
> Convert ops content logic into generic trip points and register them with the
> thermal zone.
>
> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> ---
>  .../thermal/intel/intel_quark_dts_thermal.c   | 56 +++++++++----------
>  1 file changed, 25 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/thermal/intel/intel_quark_dts_thermal.c b/drivers/thermal/intel/intel_quark_dts_thermal.c
> index 3eafc6b0e6c3..4e1d1799ec22 100644
> --- a/drivers/thermal/intel/intel_quark_dts_thermal.c
> +++ b/drivers/thermal/intel/intel_quark_dts_thermal.c
> @@ -84,6 +84,7 @@
>  #define QRK_DTS_MASK_TP_THRES          0xFF
>  #define QRK_DTS_SHIFT_TP               8
>  #define QRK_DTS_ID_TP_CRITICAL         0
> +#define QRK_DTS_ID_TP_HOT              1
>  #define QRK_DTS_SAFE_TP_THRES          105
>
>  /* Thermal Sensor Register Lock */
> @@ -104,6 +105,7 @@ struct soc_sensor_entry {
>         u32 store_ptps;
>         u32 store_dts_enable;
>         struct thermal_zone_device *tzone;
> +       struct thermal_trip trips[QRK_MAX_DTS_TRIPS];
>  };
>
>  static struct soc_sensor_entry *soc_dts;
> @@ -172,7 +174,7 @@ static int soc_dts_disable(struct thermal_zone_device *tzd)
>         return ret;
>  }
>
> -static int _get_trip_temp(int trip, int *temp)
> +static int get_trip_temp(int trip, int *temp)
>  {
>         int status;
>         u32 out;
> @@ -197,17 +199,6 @@ static int _get_trip_temp(int trip, int *temp)
>         return 0;
>  }
>
> -static inline int sys_get_trip_temp(struct thermal_zone_device *tzd,
> -                               int trip, int *temp)
> -{
> -       return _get_trip_temp(trip, temp);
> -}
> -
> -static inline int sys_get_crit_temp(struct thermal_zone_device *tzd, int *temp)
> -{
> -       return _get_trip_temp(QRK_DTS_ID_TP_CRITICAL, temp);
> -}
> -
>  static int update_trip_temp(struct soc_sensor_entry *aux_entry,
>                                 int trip, int temp)
>  {
> @@ -262,17 +253,6 @@ static inline int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
>         return update_trip_temp(tzd->devdata, trip, temp);
>  }
>
> -static int sys_get_trip_type(struct thermal_zone_device *thermal,
> -               int trip, enum thermal_trip_type *type)
> -{
> -       if (trip)
> -               *type = THERMAL_TRIP_HOT;
> -       else
> -               *type = THERMAL_TRIP_CRITICAL;
> -
> -       return 0;
> -}
> -
>  static int sys_get_curr_temp(struct thermal_zone_device *tzd,
>                                 int *temp)
>  {
> @@ -315,10 +295,7 @@ static int sys_change_mode(struct thermal_zone_device *tzd,
>
>  static struct thermal_zone_device_ops tzone_ops = {
>         .get_temp = sys_get_curr_temp,
> -       .get_trip_temp = sys_get_trip_temp,
> -       .get_trip_type = sys_get_trip_type,
>         .set_trip_temp = sys_set_trip_temp,
> -       .get_crit_temp = sys_get_crit_temp,
>         .change_mode = sys_change_mode,
>  };
>
> @@ -344,7 +321,7 @@ static void free_soc_dts(struct soc_sensor_entry *aux_entry)
>  static struct soc_sensor_entry *alloc_soc_dts(void)
>  {
>         struct soc_sensor_entry *aux_entry;
> -       int err;
> +       int err, temperature;
>         u32 out;
>         int wr_mask;
>
> @@ -385,10 +362,27 @@ static struct soc_sensor_entry *alloc_soc_dts(void)
>                         goto err_ret;
>         }
>
> -       aux_entry->tzone = thermal_zone_device_register("quark_dts",
> -                       QRK_MAX_DTS_TRIPS,
> -                       wr_mask,
> -                       aux_entry, &tzone_ops, NULL, 0, polling_delay);
> +       err = get_trip_temp(QRK_DTS_ID_TP_CRITICAL, &temperature);
> +       if (err)
> +               goto err_ret;
> +
> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].temperature = temperature;
> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].type = THERMAL_TRIP_CRITICAL;
> +
> +       err = get_trip_temp(QRK_DTS_ID_TP_HOT, &temperature);
> +       if (err)
> +               goto err_ret;

If I'm not mistaken, this won't even try to register the thermal zone
if at least one trip cannot be initialized, but previously it was
registered in that case, but the trips that failed to respond were
disabled.

This is a change in behavior that would at least need to be documented
in the changelog, but it isn't.

I'm not sure if it is safe to make even, however.

> +
> +       aux_entry->trips[QRK_DTS_ID_TP_HOT].temperature = temperature;
> +       aux_entry->trips[QRK_DTS_ID_TP_HOT].type = THERMAL_TRIP_HOT;
> +
> +       aux_entry->tzone =
> +               thermal_zone_device_register_with_trips("quark_dts",
> +                                                       aux_entry->trips,
> +                                                       QRK_MAX_DTS_TRIPS,
> +                                                       wr_mask,
> +                                                       aux_entry, &tzone_ops,
> +                                                       NULL, 0, polling_delay);
>         if (IS_ERR(aux_entry->tzone)) {
>                 err = PTR_ERR(aux_entry->tzone);
>                 goto err_ret;
> --
> 2.34.1
>
  
Daniel Lezcano Jan. 31, 2023, 4:41 p.m. UTC | #2
On 26/01/2023 15:15, Rafael J. Wysocki wrote:
> On Wed, Jan 18, 2023 at 7:16 PM Daniel Lezcano
> <daniel.lezcano@linaro.org> wrote:
>>
>> The thermal framework gives the possibility to register the trip
>> points with the thermal zone. When that is done, no get_trip_* ops are
>> needed and they can be removed.
>>
>> Convert ops content logic into generic trip points and register them with the
>> thermal zone.
>>
>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
>> ---

[ ... ]

>> -       aux_entry->tzone = thermal_zone_device_register("quark_dts",
>> -                       QRK_MAX_DTS_TRIPS,
>> -                       wr_mask,
>> -                       aux_entry, &tzone_ops, NULL, 0, polling_delay);
>> +       err = get_trip_temp(QRK_DTS_ID_TP_CRITICAL, &temperature);
>> +       if (err)
>> +               goto err_ret;
>> +
>> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].temperature = temperature;
>> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].type = THERMAL_TRIP_CRITICAL;
>> +
>> +       err = get_trip_temp(QRK_DTS_ID_TP_HOT, &temperature);
>> +       if (err)
>> +               goto err_ret;
> 
> If I'm not mistaken, this won't even try to register the thermal zone
> if at least one trip cannot be initialized, but previously it was
> registered in that case, but the trips that failed to respond were
> disabled.
> 
> This is a change in behavior that would at least need to be documented
> in the changelog, but it isn't.
> 
> I'm not sure if it is safe to make even, however.

Thanks for catching this.

Two solutions:

1. Set the temperature to THERMAL_TEMP_INVALID and change 
get_thermal_trip() to return -EINVAL or -ERANGE if the temperature is 
THERMAL_TEMP_INVALID

2. Register only the valid trip points.

What would be the preferable way ?
  
Rafael J. Wysocki Jan. 31, 2023, 7:11 p.m. UTC | #3
On Tue, Jan 31, 2023 at 5:41 PM Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
>
> On 26/01/2023 15:15, Rafael J. Wysocki wrote:
> > On Wed, Jan 18, 2023 at 7:16 PM Daniel Lezcano
> > <daniel.lezcano@linaro.org> wrote:
> >>
> >> The thermal framework gives the possibility to register the trip
> >> points with the thermal zone. When that is done, no get_trip_* ops are
> >> needed and they can be removed.
> >>
> >> Convert ops content logic into generic trip points and register them with the
> >> thermal zone.
> >>
> >> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> >> ---
>
> [ ... ]
>
> >> -       aux_entry->tzone = thermal_zone_device_register("quark_dts",
> >> -                       QRK_MAX_DTS_TRIPS,
> >> -                       wr_mask,
> >> -                       aux_entry, &tzone_ops, NULL, 0, polling_delay);
> >> +       err = get_trip_temp(QRK_DTS_ID_TP_CRITICAL, &temperature);
> >> +       if (err)
> >> +               goto err_ret;
> >> +
> >> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].temperature = temperature;
> >> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].type = THERMAL_TRIP_CRITICAL;
> >> +
> >> +       err = get_trip_temp(QRK_DTS_ID_TP_HOT, &temperature);
> >> +       if (err)
> >> +               goto err_ret;
> >
> > If I'm not mistaken, this won't even try to register the thermal zone
> > if at least one trip cannot be initialized, but previously it was
> > registered in that case, but the trips that failed to respond were
> > disabled.
> >
> > This is a change in behavior that would at least need to be documented
> > in the changelog, but it isn't.
> >
> > I'm not sure if it is safe to make even, however.
>
> Thanks for catching this.
>
> Two solutions:
>
> 1. Set the temperature to THERMAL_TEMP_INVALID and change
> get_thermal_trip() to return -EINVAL or -ERANGE if the temperature is
> THERMAL_TEMP_INVALID
>
> 2. Register only the valid trip points.
>
> What would be the preferable way ?

I think that the trip points that are registered currently need to
still be registered after the change.

Does registering a trip point with the temperature set to
THERMAL_TEMP_INVALID cause it to be effectively disabled?
  
Daniel Lezcano Jan. 31, 2023, 11:55 p.m. UTC | #4
On 31/01/2023 20:11, Rafael J. Wysocki wrote:
> On Tue, Jan 31, 2023 at 5:41 PM Daniel Lezcano
> <daniel.lezcano@linaro.org> wrote:
>>
>> On 26/01/2023 15:15, Rafael J. Wysocki wrote:
>>> On Wed, Jan 18, 2023 at 7:16 PM Daniel Lezcano
>>> <daniel.lezcano@linaro.org> wrote:
>>>>
>>>> The thermal framework gives the possibility to register the trip
>>>> points with the thermal zone. When that is done, no get_trip_* ops are
>>>> needed and they can be removed.
>>>>
>>>> Convert ops content logic into generic trip points and register them with the
>>>> thermal zone.
>>>>
>>>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
>>>> ---
>>
>> [ ... ]
>>
>>>> -       aux_entry->tzone = thermal_zone_device_register("quark_dts",
>>>> -                       QRK_MAX_DTS_TRIPS,
>>>> -                       wr_mask,
>>>> -                       aux_entry, &tzone_ops, NULL, 0, polling_delay);
>>>> +       err = get_trip_temp(QRK_DTS_ID_TP_CRITICAL, &temperature);
>>>> +       if (err)
>>>> +               goto err_ret;
>>>> +
>>>> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].temperature = temperature;
>>>> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].type = THERMAL_TRIP_CRITICAL;
>>>> +
>>>> +       err = get_trip_temp(QRK_DTS_ID_TP_HOT, &temperature);
>>>> +       if (err)
>>>> +               goto err_ret;
>>>
>>> If I'm not mistaken, this won't even try to register the thermal zone
>>> if at least one trip cannot be initialized, but previously it was
>>> registered in that case, but the trips that failed to respond were
>>> disabled.
>>>
>>> This is a change in behavior that would at least need to be documented
>>> in the changelog, but it isn't.
>>>
>>> I'm not sure if it is safe to make even, however.
>>
>> Thanks for catching this.
>>
>> Two solutions:
>>
>> 1. Set the temperature to THERMAL_TEMP_INVALID and change
>> get_thermal_trip() to return -EINVAL or -ERANGE if the temperature is
>> THERMAL_TEMP_INVALID
>>
>> 2. Register only the valid trip points.
>>
>> What would be the preferable way ?
> 
> I think that the trip points that are registered currently need to
> still be registered after the change.
> 
> Does registering a trip point with the temperature set to
> THERMAL_TEMP_INVALID cause it to be effectively disabled?

No but if we have thermal_zone_get_trip() returning -EINVAL if 
THERMAL_TEMP_INVALID is set for the specified trip id. Then the 
registering will set the disabled flag.

https://git.kernel.org/pub/scm/linux/kernel/git/thermal/linux.git/tree/drivers/thermal/thermal_core.c?h=thermal/bleeding-edge#n1395
  
Daniel Lezcano Feb. 1, 2023, 10:42 a.m. UTC | #5
On 31/01/2023 20:11, Rafael J. Wysocki wrote:
> On Tue, Jan 31, 2023 at 5:41 PM Daniel Lezcano
> <daniel.lezcano@linaro.org> wrote:
>>
>> On 26/01/2023 15:15, Rafael J. Wysocki wrote:
>>> On Wed, Jan 18, 2023 at 7:16 PM Daniel Lezcano
>>> <daniel.lezcano@linaro.org> wrote:
>>>>
>>>> The thermal framework gives the possibility to register the trip
>>>> points with the thermal zone. When that is done, no get_trip_* ops are
>>>> needed and they can be removed.
>>>>
>>>> Convert ops content logic into generic trip points and register them with the
>>>> thermal zone.
>>>>
>>>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
>>>> ---
>>
>> [ ... ]
>>
>>>> -       aux_entry->tzone = thermal_zone_device_register("quark_dts",
>>>> -                       QRK_MAX_DTS_TRIPS,
>>>> -                       wr_mask,
>>>> -                       aux_entry, &tzone_ops, NULL, 0, polling_delay);
>>>> +       err = get_trip_temp(QRK_DTS_ID_TP_CRITICAL, &temperature);
>>>> +       if (err)
>>>> +               goto err_ret;
>>>> +
>>>> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].temperature = temperature;
>>>> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].type = THERMAL_TRIP_CRITICAL;
>>>> +
>>>> +       err = get_trip_temp(QRK_DTS_ID_TP_HOT, &temperature);
>>>> +       if (err)
>>>> +               goto err_ret;
>>>
>>> If I'm not mistaken, this won't even try to register the thermal zone
>>> if at least one trip cannot be initialized, but previously it was
>>> registered in that case, but the trips that failed to respond were
>>> disabled.
>>>
>>> This is a change in behavior that would at least need to be documented
>>> in the changelog, but it isn't.
>>>
>>> I'm not sure if it is safe to make even, however.
>>
>> Thanks for catching this.
>>
>> Two solutions:
>>
>> 1. Set the temperature to THERMAL_TEMP_INVALID and change
>> get_thermal_trip() to return -EINVAL or -ERANGE if the temperature is
>> THERMAL_TEMP_INVALID
>>
>> 2. Register only the valid trip points.
>>
>> What would be the preferable way ?
> 
> I think that the trip points that are registered currently need to
> still be registered after the change.
> 
> Does registering a trip point with the temperature set to
> THERMAL_TEMP_INVALID cause it to be effectively disabled?

The initial behavior before the changes is:

The function thermal_zone_device_register() will go through all the trip 
points and call thermal_zone_get_trip(), resulting in a call to 
ops->get_trip_temp(). If the call fails, the trip point is tagged as 
disabled and will stay in this state forever, so discarded in the trip 
point crossed detection.

That does not report an error and the trip point is showed in sysfs but 
in a inconsistent state as it is actually disabled. Reading the trip 
point will return an error or not, but it is in any case disabled in the 
thermal framework. The userspace does not have the information about the 
trip point being disabled, so showing it up regardless its state is 
pointless and prone to confusion for the userspace.

IMO, it would be more sane to register the trip points which are 
actually valid, so invalid trip points are not showed up and does 
prevent extra complexity in the thermal core to handle them.
  
Rafael J. Wysocki Feb. 1, 2023, 6:47 p.m. UTC | #6
On Wed, Feb 1, 2023 at 11:42 AM Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
>
> On 31/01/2023 20:11, Rafael J. Wysocki wrote:
> > On Tue, Jan 31, 2023 at 5:41 PM Daniel Lezcano
> > <daniel.lezcano@linaro.org> wrote:
> >>
> >> On 26/01/2023 15:15, Rafael J. Wysocki wrote:
> >>> On Wed, Jan 18, 2023 at 7:16 PM Daniel Lezcano
> >>> <daniel.lezcano@linaro.org> wrote:
> >>>>
> >>>> The thermal framework gives the possibility to register the trip
> >>>> points with the thermal zone. When that is done, no get_trip_* ops are
> >>>> needed and they can be removed.
> >>>>
> >>>> Convert ops content logic into generic trip points and register them with the
> >>>> thermal zone.
> >>>>
> >>>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> >>>> ---
> >>
> >> [ ... ]
> >>
> >>>> -       aux_entry->tzone = thermal_zone_device_register("quark_dts",
> >>>> -                       QRK_MAX_DTS_TRIPS,
> >>>> -                       wr_mask,
> >>>> -                       aux_entry, &tzone_ops, NULL, 0, polling_delay);
> >>>> +       err = get_trip_temp(QRK_DTS_ID_TP_CRITICAL, &temperature);
> >>>> +       if (err)
> >>>> +               goto err_ret;
> >>>> +
> >>>> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].temperature = temperature;
> >>>> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].type = THERMAL_TRIP_CRITICAL;
> >>>> +
> >>>> +       err = get_trip_temp(QRK_DTS_ID_TP_HOT, &temperature);
> >>>> +       if (err)
> >>>> +               goto err_ret;
> >>>
> >>> If I'm not mistaken, this won't even try to register the thermal zone
> >>> if at least one trip cannot be initialized, but previously it was
> >>> registered in that case, but the trips that failed to respond were
> >>> disabled.
> >>>
> >>> This is a change in behavior that would at least need to be documented
> >>> in the changelog, but it isn't.
> >>>
> >>> I'm not sure if it is safe to make even, however.
> >>
> >> Thanks for catching this.
> >>
> >> Two solutions:
> >>
> >> 1. Set the temperature to THERMAL_TEMP_INVALID and change
> >> get_thermal_trip() to return -EINVAL or -ERANGE if the temperature is
> >> THERMAL_TEMP_INVALID
> >>
> >> 2. Register only the valid trip points.
> >>
> >> What would be the preferable way ?
> >
> > I think that the trip points that are registered currently need to
> > still be registered after the change.
> >
> > Does registering a trip point with the temperature set to
> > THERMAL_TEMP_INVALID cause it to be effectively disabled?
>
> The initial behavior before the changes is:
>
> The function thermal_zone_device_register() will go through all the trip
> points and call thermal_zone_get_trip(), resulting in a call to
> ops->get_trip_temp(). If the call fails, the trip point is tagged as
> disabled and will stay in this state forever, so discarded in the trip
> point crossed detection.
>
> That does not report an error and the trip point is showed in sysfs but
> in a inconsistent state as it is actually disabled. Reading the trip
> point will return an error or not, but it is in any case disabled in the
> thermal framework. The userspace does not have the information about the
> trip point being disabled, so showing it up regardless its state is
> pointless and prone to confusion for the userspace.
>
> IMO, it would be more sane to register the trip points which are
> actually valid, so invalid trip points are not showed up and does
> prevent extra complexity in the thermal core to handle them.

Except when the trip point can be updated to become a valid one later,
for example in response to a system configuration change.  That can
happen to ACPI-provided trip points, for example.

I don't think that this is an issue for this particular driver, but
the core needs to handle that case anyway.

Moreover, there is the case when trip points only become relevant when
their temperatures are set via ops->set_trip_temp() and they are
THERMAL_TEMP_INVALID initially, which needs to be handled by the core
either.

When the driver has no way to update trip point temperatures, either
through a firmware notification or via ops->set_trip_temp(), then I
agree that registering them is not very useful if their temperatures
cannot be determined.
  
Daniel Lezcano Feb. 1, 2023, 7:27 p.m. UTC | #7
On 01/02/2023 19:47, Rafael J. Wysocki wrote:
> On Wed, Feb 1, 2023 at 11:42 AM Daniel Lezcano
> <daniel.lezcano@linaro.org> wrote:
>>
>> On 31/01/2023 20:11, Rafael J. Wysocki wrote:
>>> On Tue, Jan 31, 2023 at 5:41 PM Daniel Lezcano
>>> <daniel.lezcano@linaro.org> wrote:
>>>>
>>>> On 26/01/2023 15:15, Rafael J. Wysocki wrote:
>>>>> On Wed, Jan 18, 2023 at 7:16 PM Daniel Lezcano
>>>>> <daniel.lezcano@linaro.org> wrote:
>>>>>>
>>>>>> The thermal framework gives the possibility to register the trip
>>>>>> points with the thermal zone. When that is done, no get_trip_* ops are
>>>>>> needed and they can be removed.
>>>>>>
>>>>>> Convert ops content logic into generic trip points and register them with the
>>>>>> thermal zone.
>>>>>>
>>>>>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
>>>>>> ---
>>>>
>>>> [ ... ]
>>>>
>>>>>> -       aux_entry->tzone = thermal_zone_device_register("quark_dts",
>>>>>> -                       QRK_MAX_DTS_TRIPS,
>>>>>> -                       wr_mask,
>>>>>> -                       aux_entry, &tzone_ops, NULL, 0, polling_delay);
>>>>>> +       err = get_trip_temp(QRK_DTS_ID_TP_CRITICAL, &temperature);
>>>>>> +       if (err)
>>>>>> +               goto err_ret;
>>>>>> +
>>>>>> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].temperature = temperature;
>>>>>> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].type = THERMAL_TRIP_CRITICAL;
>>>>>> +
>>>>>> +       err = get_trip_temp(QRK_DTS_ID_TP_HOT, &temperature);
>>>>>> +       if (err)
>>>>>> +               goto err_ret;
>>>>>
>>>>> If I'm not mistaken, this won't even try to register the thermal zone
>>>>> if at least one trip cannot be initialized, but previously it was
>>>>> registered in that case, but the trips that failed to respond were
>>>>> disabled.
>>>>>
>>>>> This is a change in behavior that would at least need to be documented
>>>>> in the changelog, but it isn't.
>>>>>
>>>>> I'm not sure if it is safe to make even, however.
>>>>
>>>> Thanks for catching this.
>>>>
>>>> Two solutions:
>>>>
>>>> 1. Set the temperature to THERMAL_TEMP_INVALID and change
>>>> get_thermal_trip() to return -EINVAL or -ERANGE if the temperature is
>>>> THERMAL_TEMP_INVALID
>>>>
>>>> 2. Register only the valid trip points.
>>>>
>>>> What would be the preferable way ?
>>>
>>> I think that the trip points that are registered currently need to
>>> still be registered after the change.
>>>
>>> Does registering a trip point with the temperature set to
>>> THERMAL_TEMP_INVALID cause it to be effectively disabled?
>>
>> The initial behavior before the changes is:
>>
>> The function thermal_zone_device_register() will go through all the trip
>> points and call thermal_zone_get_trip(), resulting in a call to
>> ops->get_trip_temp(). If the call fails, the trip point is tagged as
>> disabled and will stay in this state forever, so discarded in the trip
>> point crossed detection.
>>
>> That does not report an error and the trip point is showed in sysfs but
>> in a inconsistent state as it is actually disabled. Reading the trip
>> point will return an error or not, but it is in any case disabled in the
>> thermal framework. The userspace does not have the information about the
>> trip point being disabled, so showing it up regardless its state is
>> pointless and prone to confusion for the userspace.
>>
>> IMO, it would be more sane to register the trip points which are
>> actually valid, so invalid trip points are not showed up and does
>> prevent extra complexity in the thermal core to handle them.
> 
> Except when the trip point can be updated to become a valid one later,
> for example in response to a system configuration change.  That can
> happen to ACPI-provided trip points, for example.
> 
> I don't think that this is an issue for this particular driver, but
> the core needs to handle that case anyway.

Yes, but the point is the core code never handled that case.

If the trip point fails when registering the thermal zone (and this is 
not related to our changes), the trip point is added to the disabled 
trips bitmap and then whatever the action to validate the trip point, it 
remains disabled for the thermal framework. There is no action to enable 
it (except I missed something).

> Moreover, there is the case when trip points only become relevant when
> their temperatures are set via ops->set_trip_temp() and they are
> THERMAL_TEMP_INVALID initially, which needs to be handled by the core
> either.

Ok, then I guess the simplest change is to assign THERMAL_TEMP_INVALID 
in this driver, if get_trip_temp fails at the initialization time.

Later we can add a thermal_zone_device_update_trips() with the needed 
locking and actions related to the update.

> When the driver has no way to update trip point temperatures, either
> through a firmware notification or via ops->set_trip_temp(), then I
> agree that registering them is not very useful if their temperatures
> cannot be determined.

+1

Thanks!
  
Rafael J. Wysocki Feb. 2, 2023, 10:32 a.m. UTC | #8
On Wed, Feb 1, 2023 at 8:27 PM Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>
> On 01/02/2023 19:47, Rafael J. Wysocki wrote:
> > On Wed, Feb 1, 2023 at 11:42 AM Daniel Lezcano
> > <daniel.lezcano@linaro.org> wrote:
> >>
> >> On 31/01/2023 20:11, Rafael J. Wysocki wrote:
> >>> On Tue, Jan 31, 2023 at 5:41 PM Daniel Lezcano
> >>> <daniel.lezcano@linaro.org> wrote:
> >>>>
> >>>> On 26/01/2023 15:15, Rafael J. Wysocki wrote:
> >>>>> On Wed, Jan 18, 2023 at 7:16 PM Daniel Lezcano
> >>>>> <daniel.lezcano@linaro.org> wrote:
> >>>>>>
> >>>>>> The thermal framework gives the possibility to register the trip
> >>>>>> points with the thermal zone. When that is done, no get_trip_* ops are
> >>>>>> needed and they can be removed.
> >>>>>>
> >>>>>> Convert ops content logic into generic trip points and register them with the
> >>>>>> thermal zone.
> >>>>>>
> >>>>>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> >>>>>> ---
> >>>>
> >>>> [ ... ]
> >>>>
> >>>>>> -       aux_entry->tzone = thermal_zone_device_register("quark_dts",
> >>>>>> -                       QRK_MAX_DTS_TRIPS,
> >>>>>> -                       wr_mask,
> >>>>>> -                       aux_entry, &tzone_ops, NULL, 0, polling_delay);
> >>>>>> +       err = get_trip_temp(QRK_DTS_ID_TP_CRITICAL, &temperature);
> >>>>>> +       if (err)
> >>>>>> +               goto err_ret;
> >>>>>> +
> >>>>>> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].temperature = temperature;
> >>>>>> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].type = THERMAL_TRIP_CRITICAL;
> >>>>>> +
> >>>>>> +       err = get_trip_temp(QRK_DTS_ID_TP_HOT, &temperature);
> >>>>>> +       if (err)
> >>>>>> +               goto err_ret;
> >>>>>
> >>>>> If I'm not mistaken, this won't even try to register the thermal zone
> >>>>> if at least one trip cannot be initialized, but previously it was
> >>>>> registered in that case, but the trips that failed to respond were
> >>>>> disabled.
> >>>>>
> >>>>> This is a change in behavior that would at least need to be documented
> >>>>> in the changelog, but it isn't.
> >>>>>
> >>>>> I'm not sure if it is safe to make even, however.
> >>>>
> >>>> Thanks for catching this.
> >>>>
> >>>> Two solutions:
> >>>>
> >>>> 1. Set the temperature to THERMAL_TEMP_INVALID and change
> >>>> get_thermal_trip() to return -EINVAL or -ERANGE if the temperature is
> >>>> THERMAL_TEMP_INVALID
> >>>>
> >>>> 2. Register only the valid trip points.
> >>>>
> >>>> What would be the preferable way ?
> >>>
> >>> I think that the trip points that are registered currently need to
> >>> still be registered after the change.
> >>>
> >>> Does registering a trip point with the temperature set to
> >>> THERMAL_TEMP_INVALID cause it to be effectively disabled?
> >>
> >> The initial behavior before the changes is:
> >>
> >> The function thermal_zone_device_register() will go through all the trip
> >> points and call thermal_zone_get_trip(), resulting in a call to
> >> ops->get_trip_temp(). If the call fails, the trip point is tagged as
> >> disabled and will stay in this state forever, so discarded in the trip
> >> point crossed detection.
> >>
> >> That does not report an error and the trip point is showed in sysfs but
> >> in a inconsistent state as it is actually disabled. Reading the trip
> >> point will return an error or not, but it is in any case disabled in the
> >> thermal framework. The userspace does not have the information about the
> >> trip point being disabled, so showing it up regardless its state is
> >> pointless and prone to confusion for the userspace.
> >>
> >> IMO, it would be more sane to register the trip points which are
> >> actually valid, so invalid trip points are not showed up and does
> >> prevent extra complexity in the thermal core to handle them.
> >
> > Except when the trip point can be updated to become a valid one later,
> > for example in response to a system configuration change.  That can
> > happen to ACPI-provided trip points, for example.
> >
> > I don't think that this is an issue for this particular driver, but
> > the core needs to handle that case anyway.
>
> Yes, but the point is the core code never handled that case.

True.

What I wanted to say, though, is that the core needs to allow
registering trip points with THERMAL_TEMP_INVALID without disabling
them automatically, so they can be updated and used later.

> If the trip point fails when registering the thermal zone (and this is
> not related to our changes), the trip point is added to the disabled
> trips bitmap and then whatever the action to validate the trip point, it
> remains disabled for the thermal framework. There is no action to enable
> it (except I missed something).
>
> > Moreover, there is the case when trip points only become relevant when
> > their temperatures are set via ops->set_trip_temp() and they are
> > THERMAL_TEMP_INVALID initially, which needs to be handled by the core
> > either.
>
> Ok, then I guess the simplest change is to assign THERMAL_TEMP_INVALID
> in this driver, if get_trip_temp fails at the initialization time.
>
> Later we can add a thermal_zone_device_update_trips() with the needed
> locking and actions related to the update.

Well, there is thermal_zone_device_update() and one of the events it
is supposed to handle is THERMAL_TRIP_CHANGED, so I'm not sure how the
new interface would differ from it?
  
Daniel Lezcano Feb. 2, 2023, 1:31 p.m. UTC | #9
On 02/02/2023 11:32, Rafael J. Wysocki wrote:
> On Wed, Feb 1, 2023 at 8:27 PM Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>>
>> On 01/02/2023 19:47, Rafael J. Wysocki wrote:
>>> On Wed, Feb 1, 2023 at 11:42 AM Daniel Lezcano
>>> <daniel.lezcano@linaro.org> wrote:
>>>>
>>>> On 31/01/2023 20:11, Rafael J. Wysocki wrote:
>>>>> On Tue, Jan 31, 2023 at 5:41 PM Daniel Lezcano
>>>>> <daniel.lezcano@linaro.org> wrote:
>>>>>>
>>>>>> On 26/01/2023 15:15, Rafael J. Wysocki wrote:
>>>>>>> On Wed, Jan 18, 2023 at 7:16 PM Daniel Lezcano
>>>>>>> <daniel.lezcano@linaro.org> wrote:
>>>>>>>>
>>>>>>>> The thermal framework gives the possibility to register the trip
>>>>>>>> points with the thermal zone. When that is done, no get_trip_* ops are
>>>>>>>> needed and they can be removed.
>>>>>>>>
>>>>>>>> Convert ops content logic into generic trip points and register them with the
>>>>>>>> thermal zone.
>>>>>>>>
>>>>>>>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
>>>>>>>> ---
>>>>>>
>>>>>> [ ... ]
>>>>>>
>>>>>>>> -       aux_entry->tzone = thermal_zone_device_register("quark_dts",
>>>>>>>> -                       QRK_MAX_DTS_TRIPS,
>>>>>>>> -                       wr_mask,
>>>>>>>> -                       aux_entry, &tzone_ops, NULL, 0, polling_delay);
>>>>>>>> +       err = get_trip_temp(QRK_DTS_ID_TP_CRITICAL, &temperature);
>>>>>>>> +       if (err)
>>>>>>>> +               goto err_ret;
>>>>>>>> +
>>>>>>>> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].temperature = temperature;
>>>>>>>> +       aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].type = THERMAL_TRIP_CRITICAL;
>>>>>>>> +
>>>>>>>> +       err = get_trip_temp(QRK_DTS_ID_TP_HOT, &temperature);
>>>>>>>> +       if (err)
>>>>>>>> +               goto err_ret;
>>>>>>>
>>>>>>> If I'm not mistaken, this won't even try to register the thermal zone
>>>>>>> if at least one trip cannot be initialized, but previously it was
>>>>>>> registered in that case, but the trips that failed to respond were
>>>>>>> disabled.
>>>>>>>
>>>>>>> This is a change in behavior that would at least need to be documented
>>>>>>> in the changelog, but it isn't.
>>>>>>>
>>>>>>> I'm not sure if it is safe to make even, however.
>>>>>>
>>>>>> Thanks for catching this.
>>>>>>
>>>>>> Two solutions:
>>>>>>
>>>>>> 1. Set the temperature to THERMAL_TEMP_INVALID and change
>>>>>> get_thermal_trip() to return -EINVAL or -ERANGE if the temperature is
>>>>>> THERMAL_TEMP_INVALID
>>>>>>
>>>>>> 2. Register only the valid trip points.
>>>>>>
>>>>>> What would be the preferable way ?
>>>>>
>>>>> I think that the trip points that are registered currently need to
>>>>> still be registered after the change.
>>>>>
>>>>> Does registering a trip point with the temperature set to
>>>>> THERMAL_TEMP_INVALID cause it to be effectively disabled?
>>>>
>>>> The initial behavior before the changes is:
>>>>
>>>> The function thermal_zone_device_register() will go through all the trip
>>>> points and call thermal_zone_get_trip(), resulting in a call to
>>>> ops->get_trip_temp(). If the call fails, the trip point is tagged as
>>>> disabled and will stay in this state forever, so discarded in the trip
>>>> point crossed detection.
>>>>
>>>> That does not report an error and the trip point is showed in sysfs but
>>>> in a inconsistent state as it is actually disabled. Reading the trip
>>>> point will return an error or not, but it is in any case disabled in the
>>>> thermal framework. The userspace does not have the information about the
>>>> trip point being disabled, so showing it up regardless its state is
>>>> pointless and prone to confusion for the userspace.
>>>>
>>>> IMO, it would be more sane to register the trip points which are
>>>> actually valid, so invalid trip points are not showed up and does
>>>> prevent extra complexity in the thermal core to handle them.
>>>
>>> Except when the trip point can be updated to become a valid one later,
>>> for example in response to a system configuration change.  That can
>>> happen to ACPI-provided trip points, for example.
>>>
>>> I don't think that this is an issue for this particular driver, but
>>> the core needs to handle that case anyway.
>>
>> Yes, but the point is the core code never handled that case.
> 
> True.
> 
> What I wanted to say, though, is that the core needs to allow
> registering trip points with THERMAL_TEMP_INVALID without disabling
> them automatically, so they can be updated and used later.

Ok, so it is fine with the current code AFAICT.

The handle_thermal_trip() functions are discarding trips with 
temperature below zero for hot and critical. The trip crossing detection 
won't happen with these values.

However PASSIVE and ACTIVE trip points are going through the throttling 
governor callback with a -273000 trip temperature. I suppose those very 
specific trip points initialized to THERMAL_TEMP_INVALID are not 
associated with a cooling device, right ?


>> If the trip point fails when registering the thermal zone (and this is
>> not related to our changes), the trip point is added to the disabled
>> trips bitmap and then whatever the action to validate the trip point, it
>> remains disabled for the thermal framework. There is no action to enable
>> it (except I missed something).
>>
>>> Moreover, there is the case when trip points only become relevant when
>>> their temperatures are set via ops->set_trip_temp() and they are
>>> THERMAL_TEMP_INVALID initially, which needs to be handled by the core
>>> either.
>>
>> Ok, then I guess the simplest change is to assign THERMAL_TEMP_INVALID
>> in this driver, if get_trip_temp fails at the initialization time.
>>
>> Later we can add a thermal_zone_device_update_trips() with the needed
>> locking and actions related to the update.
> 
> Well, there is thermal_zone_device_update() and one of the events it
> is supposed to handle is THERMAL_TRIP_CHANGED, so I'm not sure how the
> new interface would differ from it?

Yes, we may have to investigate if the event should trigger the update 
or the update should trigger the event.
  

Patch

diff --git a/drivers/thermal/intel/intel_quark_dts_thermal.c b/drivers/thermal/intel/intel_quark_dts_thermal.c
index 3eafc6b0e6c3..4e1d1799ec22 100644
--- a/drivers/thermal/intel/intel_quark_dts_thermal.c
+++ b/drivers/thermal/intel/intel_quark_dts_thermal.c
@@ -84,6 +84,7 @@ 
 #define QRK_DTS_MASK_TP_THRES		0xFF
 #define QRK_DTS_SHIFT_TP		8
 #define QRK_DTS_ID_TP_CRITICAL		0
+#define QRK_DTS_ID_TP_HOT		1
 #define QRK_DTS_SAFE_TP_THRES		105
 
 /* Thermal Sensor Register Lock */
@@ -104,6 +105,7 @@  struct soc_sensor_entry {
 	u32 store_ptps;
 	u32 store_dts_enable;
 	struct thermal_zone_device *tzone;
+	struct thermal_trip trips[QRK_MAX_DTS_TRIPS];
 };
 
 static struct soc_sensor_entry *soc_dts;
@@ -172,7 +174,7 @@  static int soc_dts_disable(struct thermal_zone_device *tzd)
 	return ret;
 }
 
-static int _get_trip_temp(int trip, int *temp)
+static int get_trip_temp(int trip, int *temp)
 {
 	int status;
 	u32 out;
@@ -197,17 +199,6 @@  static int _get_trip_temp(int trip, int *temp)
 	return 0;
 }
 
-static inline int sys_get_trip_temp(struct thermal_zone_device *tzd,
-				int trip, int *temp)
-{
-	return _get_trip_temp(trip, temp);
-}
-
-static inline int sys_get_crit_temp(struct thermal_zone_device *tzd, int *temp)
-{
-	return _get_trip_temp(QRK_DTS_ID_TP_CRITICAL, temp);
-}
-
 static int update_trip_temp(struct soc_sensor_entry *aux_entry,
 				int trip, int temp)
 {
@@ -262,17 +253,6 @@  static inline int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
 	return update_trip_temp(tzd->devdata, trip, temp);
 }
 
-static int sys_get_trip_type(struct thermal_zone_device *thermal,
-		int trip, enum thermal_trip_type *type)
-{
-	if (trip)
-		*type = THERMAL_TRIP_HOT;
-	else
-		*type = THERMAL_TRIP_CRITICAL;
-
-	return 0;
-}
-
 static int sys_get_curr_temp(struct thermal_zone_device *tzd,
 				int *temp)
 {
@@ -315,10 +295,7 @@  static int sys_change_mode(struct thermal_zone_device *tzd,
 
 static struct thermal_zone_device_ops tzone_ops = {
 	.get_temp = sys_get_curr_temp,
-	.get_trip_temp = sys_get_trip_temp,
-	.get_trip_type = sys_get_trip_type,
 	.set_trip_temp = sys_set_trip_temp,
-	.get_crit_temp = sys_get_crit_temp,
 	.change_mode = sys_change_mode,
 };
 
@@ -344,7 +321,7 @@  static void free_soc_dts(struct soc_sensor_entry *aux_entry)
 static struct soc_sensor_entry *alloc_soc_dts(void)
 {
 	struct soc_sensor_entry *aux_entry;
-	int err;
+	int err, temperature;
 	u32 out;
 	int wr_mask;
 
@@ -385,10 +362,27 @@  static struct soc_sensor_entry *alloc_soc_dts(void)
 			goto err_ret;
 	}
 
-	aux_entry->tzone = thermal_zone_device_register("quark_dts",
-			QRK_MAX_DTS_TRIPS,
-			wr_mask,
-			aux_entry, &tzone_ops, NULL, 0, polling_delay);
+	err = get_trip_temp(QRK_DTS_ID_TP_CRITICAL, &temperature);
+	if (err)
+		goto err_ret;
+
+	aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].temperature = temperature;
+	aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].type = THERMAL_TRIP_CRITICAL;
+
+	err = get_trip_temp(QRK_DTS_ID_TP_HOT, &temperature);
+	if (err)
+		goto err_ret;
+
+	aux_entry->trips[QRK_DTS_ID_TP_HOT].temperature = temperature;
+	aux_entry->trips[QRK_DTS_ID_TP_HOT].type = THERMAL_TRIP_HOT;
+
+	aux_entry->tzone =
+		thermal_zone_device_register_with_trips("quark_dts",
+							aux_entry->trips,
+							QRK_MAX_DTS_TRIPS,
+							wr_mask,
+							aux_entry, &tzone_ops,
+							NULL, 0, polling_delay);
 	if (IS_ERR(aux_entry->tzone)) {
 		err = PTR_ERR(aux_entry->tzone);
 		goto err_ret;