hwmon/coretemp: Simplify platform device antics

Message ID 898dbb76a54aae6ca58ceefcab9ab18beeee2fff.1668096928.git.robin.murphy@arm.com
State New
Headers
Series hwmon/coretemp: Simplify platform device antics |

Commit Message

Robin Murphy Nov. 10, 2022, 4:20 p.m. UTC
  Coretemp's vestigial platform driver is odd. All the real work is done
globally by the initcall and CPU hotplug notifiers, while the "driver"
effectively just wraps an allocation and the registration of the hwmon
interface in a long-winded round-trip through the driver core. The whole
logic of dynamically creating and destroying platform devices to bring
the interfaces up and down is fatally flawed right away, since it
assumes platform_device_add() will synchronously bind the driver and set
drvdata before it returns, thus results in a NULL dereference if
drivers_autoprobe is turned off for the platform bus. Furthermore, the
unusual approach of doing that from within a CPU hotplug notifier is
also problematic. It's already commented in the code that it deadlocks
suspend, but it also causes lockdep issues for other drivers or
subsystems which may want to legitimately register a CPU hotplug
notifier from a platform bus notifier.

All of these issues can be solved by ripping this questionable behaviour
out completely, simply tying the platform devices to the lifetime of the
module itself, and directly managing the hwmon interfaces from the
hotplug notifiers. There is a slight user-visible change in that
/sys/bus/platform/drivers/coretemp will no longer appear, and
/sys/devices/platform/coretemp.n will remain present if package n is
hotplugged off, but hwmon users should really only be looking for the
presence of the hwmon interfaces, whose behaviour remains unchanged.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---

I haven't been able to fully test hotplug since I only have a
single-socket Intel system to hand.

 drivers/hwmon/coretemp.c | 134 ++++++++++++++++++---------------------
 1 file changed, 61 insertions(+), 73 deletions(-)
  

Comments

Janusz Krzysztofik Nov. 10, 2022, 4:45 p.m. UTC | #1
Hi Robin,

On Thursday, 10 November 2022 17:20:25 CET Robin Murphy wrote:
> Coretemp's vestigial platform driver is odd. All the real work is done
> globally by the initcall and CPU hotplug notifiers, while the "driver"
> effectively just wraps an allocation and the registration of the hwmon
> interface in a long-winded round-trip through the driver core. The whole
> logic of dynamically creating and destroying platform devices to bring
> the interfaces up and down is fatally flawed right away, since it
> assumes platform_device_add() will synchronously bind the driver and set
> drvdata before it returns, thus results in a NULL dereference if
> drivers_autoprobe is turned off for the platform bus. Furthermore, the
> unusual approach of doing that from within a CPU hotplug notifier is
> also problematic. It's already commented in the code that it deadlocks
> suspend, but it also causes lockdep issues for other drivers or
> subsystems which may want to legitimately register a CPU hotplug
> notifier from a platform bus notifier.
> 
> All of these issues can be solved by ripping this questionable behaviour
> out completely, simply tying the platform devices to the lifetime of the
> module itself, and directly managing the hwmon interfaces from the
> hotplug notifiers. There is a slight user-visible change in that
> /sys/bus/platform/drivers/coretemp will no longer appear, and
> /sys/devices/platform/coretemp.n will remain present if package n is
> hotplugged off, but hwmon users should really only be looking for the
> presence of the hwmon interfaces, whose behaviour remains unchanged.
> 
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> 
> I haven't been able to fully test hotplug since I only have a
> single-socket Intel system to hand.

I'll give it a try on our intel-gfx-trybot, together with our former iommu 
workaround reverted.

Thanks,
Janusz

> 
>  drivers/hwmon/coretemp.c | 134 ++++++++++++++++++---------------------
>  1 file changed, 61 insertions(+), 73 deletions(-)
> 
> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> index 8bf32c6c85d9..9fa68a81625e 100644
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -543,66 +543,49 @@ static void coretemp_remove_core(struct platform_data 
*pdata, int indx)
>  		ida_free(&pdata->ida, indx - BASE_SYSFS_ATTR_NO);
>  }
>  
> -static int coretemp_probe(struct platform_device *pdev)
> +static int coretemp_device_add(int zoneid)
>  {
> -	struct device *dev = &pdev->dev;
> +	struct platform_device *pdev;
>  	struct platform_data *pdata;
> +	int err;
>  
>  	/* Initialize the per-zone data structures */
> -	pdata = devm_kzalloc(dev, sizeof(struct platform_data), 
GFP_KERNEL);
> +	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
>  	if (!pdata)
>  		return -ENOMEM;
>  
> -	pdata->pkg_id = pdev->id;
> +	pdata->pkg_id = zoneid;
>  	ida_init(&pdata->ida);
> -	platform_set_drvdata(pdev, pdata);
> -
> -	pdata->hwmon_dev = devm_hwmon_device_register_with_groups(dev, 
DRVNAME,
> -								
  pdata, NULL);
> -	return PTR_ERR_OR_ZERO(pdata->hwmon_dev);
> -}
> -
> -static int coretemp_remove(struct platform_device *pdev)
> -{
> -	struct platform_data *pdata = platform_get_drvdata(pdev);
> -	int i;
> -
> -	for (i = MAX_CORE_DATA - 1; i >= 0; --i)
> -		if (pdata->core_data[i])
> -			coretemp_remove_core(pdata, i);
> -
> -	ida_destroy(&pdata->ida);
> -	return 0;
> -}
> -
> -static struct platform_driver coretemp_driver = {
> -	.driver = {
> -		.name = DRVNAME,
> -	},
> -	.probe = coretemp_probe,
> -	.remove = coretemp_remove,
> -};
> -
> -static struct platform_device *coretemp_device_add(unsigned int cpu)
> -{
> -	int err, zoneid = topology_logical_die_id(cpu);
> -	struct platform_device *pdev;
> -
> -	if (zoneid < 0)
> -		return ERR_PTR(-ENOMEM);
>  
>  	pdev = platform_device_alloc(DRVNAME, zoneid);
> -	if (!pdev)
> -		return ERR_PTR(-ENOMEM);
> -
> -	err = platform_device_add(pdev);
> -	if (err) {
> -		platform_device_put(pdev);
> -		return ERR_PTR(err);
> +	if (!pdev) {
> +		err = -ENOMEM;
> +		goto err_free_pdata;
>  	}
>  
> +	err = platform_device_add(pdev);
> +	if (err)
> +		goto err_put_dev;
> +
> +	platform_set_drvdata(pdev, pdata);
>  	zone_devices[zoneid] = pdev;
> -	return pdev;
> +	return 0;
> +
> +err_put_dev:
> +	platform_device_put(pdev);
> +err_free_pdata:
> +	kfree(pdata);
> +	return err;
> +}
> +
> +static void coretemp_device_remove(int zoneid)
> +{
> +	struct platform_device *pdev = zone_devices[zoneid];
> +	struct platform_data *pdata = platform_get_drvdata(pdev);
> +
> +	ida_destroy(&pdata->ida);
> +	kfree(pdata);
> +	platform_device_unregister(pdev);
>  }
>  
>  static int coretemp_cpu_online(unsigned int cpu)
> @@ -626,7 +609,10 @@ static int coretemp_cpu_online(unsigned int cpu)
>  	if (!cpu_has(c, X86_FEATURE_DTHERM))
>  		return -ENODEV;
>  
> -	if (!pdev) {
> +	pdata = platform_get_drvdata(pdev);
> +	if (!pdata->hwmon_dev) {
> +		struct device *hwmon;
> +
>  		/* Check the microcode version of the CPU */
>  		if (chk_ucode_version(cpu))
>  			return -EINVAL;
> @@ -637,9 +623,11 @@ static int coretemp_cpu_online(unsigned int cpu)
>  		 * online. So, initialize per-pkg data structures and
>  		 * then bring this core online.
>  		 */
> -		pdev = coretemp_device_add(cpu);
> -		if (IS_ERR(pdev))
> -			return PTR_ERR(pdev);
> +		hwmon = hwmon_device_register_with_groups(&pdev->dev, 
DRVNAME,
> +							  
pdata, NULL);
> +		if (IS_ERR(hwmon))
> +			return PTR_ERR(hwmon);
> +		pdata->hwmon_dev = hwmon;
>  
>  		/*
>  		 * Check whether pkgtemp support is available.
> @@ -649,7 +637,6 @@ static int coretemp_cpu_online(unsigned int cpu)
>  			coretemp_add_core(pdev, cpu, 1);
>  	}
>  
> -	pdata = platform_get_drvdata(pdev);
>  	/*
>  	 * Check whether a thread sibling is already online. If not add the
>  	 * interface for this CPU core.
> @@ -668,18 +655,14 @@ static int coretemp_cpu_offline(unsigned int cpu)
>  	struct temp_data *tdata;
>  	int i, indx = -1, target;
>  
> -	/*
> -	 * Don't execute this on suspend as the device remove locks
> -	 * up the machine.
> -	 */
> +	/* No need to tear down any interfaces for suspend */
>  	if (cpuhp_tasks_frozen)
>  		return 0;
>  
>  	/* If the physical CPU device does not exist, just return */
> -	if (!pdev)
> -		return 0;
> -
>  	pd = platform_get_drvdata(pdev);
> +	if (!pd->hwmon_dev)
> +		return 0;
>  
>  	for (i = 0; i < NUM_REAL_CORES; i++) {
>  		if (pd->cpu_map[i] == topology_core_id(cpu)) {
> @@ -711,13 +694,14 @@ static int coretemp_cpu_offline(unsigned int cpu)
>  	}
>  
>  	/*
> -	 * If all cores in this pkg are offline, remove the device. This
> -	 * will invoke the platform driver remove function, which cleans up
> -	 * the rest.
> +	 * If all cores in this pkg are offline, remove the interface.
>  	 */
> +	tdata = pd->core_data[PKG_SYSFS_ATTR_NO];
>  	if (cpumask_empty(&pd->cpumask)) {
> -		zone_devices[topology_logical_die_id(cpu)] = NULL;
> -		platform_device_unregister(pdev);
> +		if (tdata)
> +			coretemp_remove_core(pd, PKG_SYSFS_ATTR_NO);
> +		hwmon_device_unregister(pd->hwmon_dev);
> +		pd->hwmon_dev = NULL;
>  		return 0;
>  	}
>  
> @@ -725,7 +709,6 @@ static int coretemp_cpu_offline(unsigned int cpu)
>  	 * Check whether this core is the target for the package
>  	 * interface. We need to assign it to some other cpu.
>  	 */
> -	tdata = pd->core_data[PKG_SYSFS_ATTR_NO];
>  	if (tdata && tdata->cpu == cpu) {
>  		target = cpumask_first(&pd->cpumask);
>  		mutex_lock(&tdata->update_lock);
> @@ -744,7 +727,7 @@ static enum cpuhp_state coretemp_hp_online;
>  
>  static int __init coretemp_init(void)
>  {
> -	int err;
> +	int i, err;
>  
>  	/*
>  	 * CPUID.06H.EAX[0] indicates whether the CPU has thermal
> @@ -760,20 +743,22 @@ static int __init coretemp_init(void)
>  	if (!zone_devices)
>  		return -ENOMEM;
>  
> -	err = platform_driver_register(&coretemp_driver);
> -	if (err)
> -		goto outzone;
> +	for (i = 0; i < max_zones; i++) {
> +		err = coretemp_device_add(i);
> +		if (err)
> +			goto outzone;
> +	}
>  
>  	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hwmon/
coretemp:online",
>  				coretemp_cpu_online, 
coretemp_cpu_offline);
>  	if (err < 0)
> -		goto outdrv;
> +		goto outzone;
>  	coretemp_hp_online = err;
>  	return 0;
>  
> -outdrv:
> -	platform_driver_unregister(&coretemp_driver);
>  outzone:
> +	while (i--)
> +		coretemp_device_remove(i);
>  	kfree(zone_devices);
>  	return err;
>  }
> @@ -781,8 +766,11 @@ module_init(coretemp_init)
>  
>  static void __exit coretemp_exit(void)
>  {
> +	int i;
> +
>  	cpuhp_remove_state(coretemp_hp_online);
> -	platform_driver_unregister(&coretemp_driver);
> +	for (i = 0; i < max_zones; i++)
> +		coretemp_device_remove(i);
>  	kfree(zone_devices);
>  }
>  module_exit(coretemp_exit)
>
  
Guenter Roeck Nov. 11, 2022, 9:37 p.m. UTC | #2
On Thu, Nov 10, 2022 at 04:20:25PM +0000, Robin Murphy wrote:
> Coretemp's vestigial platform driver is odd. All the real work is done
> globally by the initcall and CPU hotplug notifiers, while the "driver"
> effectively just wraps an allocation and the registration of the hwmon
> interface in a long-winded round-trip through the driver core. The whole
> logic of dynamically creating and destroying platform devices to bring
> the interfaces up and down is fatally flawed right away, since it
> assumes platform_device_add() will synchronously bind the driver and set
> drvdata before it returns, thus results in a NULL dereference if
> drivers_autoprobe is turned off for the platform bus. Furthermore, the
> unusual approach of doing that from within a CPU hotplug notifier is
> also problematic. It's already commented in the code that it deadlocks
> suspend, but it also causes lockdep issues for other drivers or
> subsystems which may want to legitimately register a CPU hotplug
> notifier from a platform bus notifier.
> 
> All of these issues can be solved by ripping this questionable behaviour
> out completely, simply tying the platform devices to the lifetime of the
> module itself, and directly managing the hwmon interfaces from the
> hotplug notifiers. There is a slight user-visible change in that
> /sys/bus/platform/drivers/coretemp will no longer appear, and
> /sys/devices/platform/coretemp.n will remain present if package n is
> hotplugged off, but hwmon users should really only be looking for the
> presence of the hwmon interfaces, whose behaviour remains unchanged.
> 
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> 
> I haven't been able to fully test hotplug since I only have a
> single-socket Intel system to hand.

Someone with access to hardware will have to validate this.

For both subject and description, please avoid terms like "antics",
"odd", or "questionable". Please describe the problem in neutral terms.

Thanks,
Guenter
  
Janusz Krzysztofik Nov. 14, 2022, 8:46 a.m. UTC | #3
On Thursday, 10 November 2022 17:45:50 CET Janusz Krzysztofik wrote:
> Hi Robin,
> 
> On Thursday, 10 November 2022 17:20:25 CET Robin Murphy wrote:
> > Coretemp's vestigial platform driver is odd. All the real work is done
> > globally by the initcall and CPU hotplug notifiers, while the "driver"
> > effectively just wraps an allocation and the registration of the hwmon
> > interface in a long-winded round-trip through the driver core. The whole
> > logic of dynamically creating and destroying platform devices to bring
> > the interfaces up and down is fatally flawed right away, since it
> > assumes platform_device_add() will synchronously bind the driver and set
> > drvdata before it returns, thus results in a NULL dereference if
> > drivers_autoprobe is turned off for the platform bus. Furthermore, the
> > unusual approach of doing that from within a CPU hotplug notifier is
> > also problematic. It's already commented in the code that it deadlocks
> > suspend, but it also causes lockdep issues for other drivers or
> > subsystems which may want to legitimately register a CPU hotplug
> > notifier from a platform bus notifier.
> > 
> > All of these issues can be solved by ripping this questionable behaviour
> > out completely, simply tying the platform devices to the lifetime of the
> > module itself, and directly managing the hwmon interfaces from the
> > hotplug notifiers. There is a slight user-visible change in that
> > /sys/bus/platform/drivers/coretemp will no longer appear, and
> > /sys/devices/platform/coretemp.n will remain present if package n is
> > hotplugged off, but hwmon users should really only be looking for the
> > presence of the hwmon interfaces, whose behaviour remains unchanged.
> > 
> > Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> > ---
> > 
> > I haven't been able to fully test hotplug since I only have a
> > single-socket Intel system to hand.
> 
> I'll give it a try on our intel-gfx-trybot, together with our former iommu 
> workaround reverted.

With our local iommu workaround reverted, 5 out of 37 test machines failed to 
boot cleanly, and 1 other machine failed to wake up cleanly from S3, all due 
to coretemp / iommu lockdep splat reported.
https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_110806v1/bat-all.html?testfilter=@load%7Cs3-without-i915%7Cabort
https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_110806v1/bat-adlp-6/boot0.txt
https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_110806v1/fi-kbl-soraka/boot0.txt
https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_110806v1/fi-kbl-x1275/boot0.txt
https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_110806v1/fi-apl-guc/boot0.txt
https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_110806v1/fi-skl-guc/boot0.txt
https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_110806v1/bat-rpls-1/dmesg0.txt

With this coretemp patch on top, the lockdep splat was not reported on any 
machine.
https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_110768v1/bat-all.html?testfilter=@load%7Cs3-without-i915%7Cabort

This patch seems to fix the coretemp / iommu lockdep splat issue.

Tested-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>


> 
> Thanks,
> Janusz
> 
> > 
> >  drivers/hwmon/coretemp.c | 134 ++++++++++++++++++---------------------
> >  1 file changed, 61 insertions(+), 73 deletions(-)
> > 
> > diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> > index 8bf32c6c85d9..9fa68a81625e 100644
> > --- a/drivers/hwmon/coretemp.c
> > +++ b/drivers/hwmon/coretemp.c
> > @@ -543,66 +543,49 @@ static void coretemp_remove_core(struct platform_data 
> *pdata, int indx)
> >  		ida_free(&pdata->ida, indx - BASE_SYSFS_ATTR_NO);
> >  }
> >  
> > -static int coretemp_probe(struct platform_device *pdev)
> > +static int coretemp_device_add(int zoneid)
> >  {
> > -	struct device *dev = &pdev->dev;
> > +	struct platform_device *pdev;
> >  	struct platform_data *pdata;
> > +	int err;
> >  
> >  	/* Initialize the per-zone data structures */
> > -	pdata = devm_kzalloc(dev, sizeof(struct platform_data), 
> GFP_KERNEL);
> > +	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
> >  	if (!pdata)
> >  		return -ENOMEM;
> >  
> > -	pdata->pkg_id = pdev->id;
> > +	pdata->pkg_id = zoneid;
> >  	ida_init(&pdata->ida);
> > -	platform_set_drvdata(pdev, pdata);
> > -
> > -	pdata->hwmon_dev = devm_hwmon_device_register_with_groups(dev, 
> DRVNAME,
> > -								
>   pdata, NULL);
> > -	return PTR_ERR_OR_ZERO(pdata->hwmon_dev);
> > -}
> > -
> > -static int coretemp_remove(struct platform_device *pdev)
> > -{
> > -	struct platform_data *pdata = platform_get_drvdata(pdev);
> > -	int i;
> > -
> > -	for (i = MAX_CORE_DATA - 1; i >= 0; --i)
> > -		if (pdata->core_data[i])
> > -			coretemp_remove_core(pdata, i);
> > -
> > -	ida_destroy(&pdata->ida);
> > -	return 0;
> > -}
> > -
> > -static struct platform_driver coretemp_driver = {
> > -	.driver = {
> > -		.name = DRVNAME,
> > -	},
> > -	.probe = coretemp_probe,
> > -	.remove = coretemp_remove,
> > -};
> > -
> > -static struct platform_device *coretemp_device_add(unsigned int cpu)
> > -{
> > -	int err, zoneid = topology_logical_die_id(cpu);
> > -	struct platform_device *pdev;
> > -
> > -	if (zoneid < 0)
> > -		return ERR_PTR(-ENOMEM);
> >  
> >  	pdev = platform_device_alloc(DRVNAME, zoneid);
> > -	if (!pdev)
> > -		return ERR_PTR(-ENOMEM);
> > -
> > -	err = platform_device_add(pdev);
> > -	if (err) {
> > -		platform_device_put(pdev);
> > -		return ERR_PTR(err);
> > +	if (!pdev) {
> > +		err = -ENOMEM;
> > +		goto err_free_pdata;
> >  	}
> >  
> > +	err = platform_device_add(pdev);
> > +	if (err)
> > +		goto err_put_dev;
> > +
> > +	platform_set_drvdata(pdev, pdata);
> >  	zone_devices[zoneid] = pdev;
> > -	return pdev;
> > +	return 0;
> > +
> > +err_put_dev:
> > +	platform_device_put(pdev);
> > +err_free_pdata:
> > +	kfree(pdata);
> > +	return err;
> > +}
> > +
> > +static void coretemp_device_remove(int zoneid)
> > +{
> > +	struct platform_device *pdev = zone_devices[zoneid];
> > +	struct platform_data *pdata = platform_get_drvdata(pdev);
> > +
> > +	ida_destroy(&pdata->ida);
> > +	kfree(pdata);
> > +	platform_device_unregister(pdev);
> >  }
> >  
> >  static int coretemp_cpu_online(unsigned int cpu)
> > @@ -626,7 +609,10 @@ static int coretemp_cpu_online(unsigned int cpu)
> >  	if (!cpu_has(c, X86_FEATURE_DTHERM))
> >  		return -ENODEV;
> >  
> > -	if (!pdev) {
> > +	pdata = platform_get_drvdata(pdev);
> > +	if (!pdata->hwmon_dev) {
> > +		struct device *hwmon;
> > +
> >  		/* Check the microcode version of the CPU */
> >  		if (chk_ucode_version(cpu))
> >  			return -EINVAL;
> > @@ -637,9 +623,11 @@ static int coretemp_cpu_online(unsigned int cpu)
> >  		 * online. So, initialize per-pkg data structures and
> >  		 * then bring this core online.
> >  		 */
> > -		pdev = coretemp_device_add(cpu);
> > -		if (IS_ERR(pdev))
> > -			return PTR_ERR(pdev);
> > +		hwmon = hwmon_device_register_with_groups(&pdev->dev, 
> DRVNAME,
> > +							  
> pdata, NULL);
> > +		if (IS_ERR(hwmon))
> > +			return PTR_ERR(hwmon);
> > +		pdata->hwmon_dev = hwmon;
> >  
> >  		/*
> >  		 * Check whether pkgtemp support is available.
> > @@ -649,7 +637,6 @@ static int coretemp_cpu_online(unsigned int cpu)
> >  			coretemp_add_core(pdev, cpu, 1);
> >  	}
> >  
> > -	pdata = platform_get_drvdata(pdev);
> >  	/*
> >  	 * Check whether a thread sibling is already online. If not add the
> >  	 * interface for this CPU core.
> > @@ -668,18 +655,14 @@ static int coretemp_cpu_offline(unsigned int cpu)
> >  	struct temp_data *tdata;
> >  	int i, indx = -1, target;
> >  
> > -	/*
> > -	 * Don't execute this on suspend as the device remove locks
> > -	 * up the machine.
> > -	 */
> > +	/* No need to tear down any interfaces for suspend */
> >  	if (cpuhp_tasks_frozen)
> >  		return 0;
> >  
> >  	/* If the physical CPU device does not exist, just return */
> > -	if (!pdev)
> > -		return 0;
> > -
> >  	pd = platform_get_drvdata(pdev);
> > +	if (!pd->hwmon_dev)
> > +		return 0;
> >  
> >  	for (i = 0; i < NUM_REAL_CORES; i++) {
> >  		if (pd->cpu_map[i] == topology_core_id(cpu)) {
> > @@ -711,13 +694,14 @@ static int coretemp_cpu_offline(unsigned int cpu)
> >  	}
> >  
> >  	/*
> > -	 * If all cores in this pkg are offline, remove the device. This
> > -	 * will invoke the platform driver remove function, which cleans up
> > -	 * the rest.
> > +	 * If all cores in this pkg are offline, remove the interface.
> >  	 */
> > +	tdata = pd->core_data[PKG_SYSFS_ATTR_NO];
> >  	if (cpumask_empty(&pd->cpumask)) {
> > -		zone_devices[topology_logical_die_id(cpu)] = NULL;
> > -		platform_device_unregister(pdev);
> > +		if (tdata)
> > +			coretemp_remove_core(pd, PKG_SYSFS_ATTR_NO);
> > +		hwmon_device_unregister(pd->hwmon_dev);
> > +		pd->hwmon_dev = NULL;
> >  		return 0;
> >  	}
> >  
> > @@ -725,7 +709,6 @@ static int coretemp_cpu_offline(unsigned int cpu)
> >  	 * Check whether this core is the target for the package
> >  	 * interface. We need to assign it to some other cpu.
> >  	 */
> > -	tdata = pd->core_data[PKG_SYSFS_ATTR_NO];
> >  	if (tdata && tdata->cpu == cpu) {
> >  		target = cpumask_first(&pd->cpumask);
> >  		mutex_lock(&tdata->update_lock);
> > @@ -744,7 +727,7 @@ static enum cpuhp_state coretemp_hp_online;
> >  
> >  static int __init coretemp_init(void)
> >  {
> > -	int err;
> > +	int i, err;
> >  
> >  	/*
> >  	 * CPUID.06H.EAX[0] indicates whether the CPU has thermal
> > @@ -760,20 +743,22 @@ static int __init coretemp_init(void)
> >  	if (!zone_devices)
> >  		return -ENOMEM;
> >  
> > -	err = platform_driver_register(&coretemp_driver);
> > -	if (err)
> > -		goto outzone;
> > +	for (i = 0; i < max_zones; i++) {
> > +		err = coretemp_device_add(i);
> > +		if (err)
> > +			goto outzone;
> > +	}
> >  
> >  	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hwmon/
> coretemp:online",
> >  				coretemp_cpu_online, 
> coretemp_cpu_offline);
> >  	if (err < 0)
> > -		goto outdrv;
> > +		goto outzone;
> >  	coretemp_hp_online = err;
> >  	return 0;
> >  
> > -outdrv:
> > -	platform_driver_unregister(&coretemp_driver);
> >  outzone:
> > +	while (i--)
> > +		coretemp_device_remove(i);
> >  	kfree(zone_devices);
> >  	return err;
> >  }
> > @@ -781,8 +766,11 @@ module_init(coretemp_init)
> >  
> >  static void __exit coretemp_exit(void)
> >  {
> > +	int i;
> > +
> >  	cpuhp_remove_state(coretemp_hp_online);
> > -	platform_driver_unregister(&coretemp_driver);
> > +	for (i = 0; i < max_zones; i++)
> > +		coretemp_device_remove(i);
> >  	kfree(zone_devices);
> >  }
> >  module_exit(coretemp_exit)
> > 
> 
>
  
Linux regression tracking (Thorsten Leemhuis) Nov. 15, 2022, 10:14 a.m. UTC | #4
On 10.11.22 17:20, Robin Murphy wrote:
> Coretemp's vestigial platform driver is odd. All the real work is done
> globally by the initcall and CPU hotplug notifiers, while the "driver"
> effectively just wraps an allocation and the registration of the hwmon
> interface in a long-winded round-trip through the driver core. The whole
> logic of dynamically creating and destroying platform devices to bring
> the interfaces up and down is fatally flawed right away, since it
> assumes platform_device_add() will synchronously bind the driver and set
> drvdata before it returns, thus results in a NULL dereference if
> drivers_autoprobe is turned off for the platform bus. Furthermore, the
> unusual approach of doing that from within a CPU hotplug notifier is
> also problematic. It's already commented in the code that it deadlocks
> suspend, but it also causes lockdep issues for other drivers or
> subsystems which may want to legitimately register a CPU hotplug
> notifier from a platform bus notifier.
> 
> All of these issues can be solved by ripping this questionable behaviour
> out completely, simply tying the platform devices to the lifetime of the
> module itself, and directly managing the hwmon interfaces from the
> hotplug notifiers. There is a slight user-visible change in that
> /sys/bus/platform/drivers/coretemp will no longer appear, and
> /sys/devices/platform/coretemp.n will remain present if package n is
> hotplugged off, but hwmon users should really only be looking for the
> presence of the hwmon interfaces, whose behaviour remains unchanged.
> 
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>

For future archeologists and the regression tracking this afaics should
have the following tags, as outlined by the documentation[1].

Link:
https://lore.kernel.org/lkml/20220922101036.87457-1-janusz.krzysztofik@linux.intel.com/
Link: https://gitlab.freedesktop.org/drm/intel/issues/6641

Any maybe a reported-by for Janusz, too.

Ciao, Thorsten

[1] see Documentation/process/submitting-patches.rst
(http://docs.kernel.org/process/submitting-patches.html) and
Documentation/process/5.Posting.rst
(https://docs.kernel.org/process/5.Posting.html)

P.S.:

#regzbot ^backmonitor:
https://lore.kernel.org/lkml/20220922101036.87457-1-janusz.krzysztofik@linux.intel.com/
  
Linux regression tracking (Thorsten Leemhuis) Dec. 1, 2022, 3:02 p.m. UTC | #5
Hi, this is your Linux kernel regression tracker. I wondered what
happened to this regression fix, as things looks stalled from here --
but maybe I'm missing something.

On 11.11.22 22:37, Guenter Roeck wrote:
> On Thu, Nov 10, 2022 at 04:20:25PM +0000, Robin Murphy wrote:
>> Coretemp's vestigial platform driver is odd. All the real work is done
>> globally by the initcall and CPU hotplug notifiers, while the "driver"
>> effectively just wraps an allocation and the registration of the hwmon
>> interface in a long-winded round-trip through the driver core. The whole
>> logic of dynamically creating and destroying platform devices to bring
>> the interfaces up and down is fatally flawed right away, since it
>> notifier from a platform bus notifier.
> [...]
>> I haven't been able to fully test hotplug since I only have a
>> single-socket Intel system to hand.
> Someone with access to hardware will have to validate this.

That afaics has happened.

> For both subject and description, please avoid terms like "antics",
> "odd", or "questionable". Please describe the problem in neutral terms.

Robin, did you take care of that?

Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)

P.S.: As the Linux kernel's regression tracker I deal with a lot of
reports and sometimes miss something important when writing mails like
this. If that's the case here, don't hesitate to tell me in a public
reply, it's in everyone's interest to set the public record straight.

#regzbot ignore-activity
  
Linux regression tracking (Thorsten Leemhuis) Dec. 6, 2022, 5:54 a.m. UTC | #6
On 01.12.22 16:02, Thorsten Leemhuis wrote:
> Hi, this is your Linux kernel regression tracker. I wondered what
> happened to this regression fix, as things looks stalled from here --
> but maybe I'm missing something.

This still looks stalled; hope someone will tell me if I missed any
progress.

> On 11.11.22 22:37, Guenter Roeck wrote:
>> On Thu, Nov 10, 2022 at 04:20:25PM +0000, Robin Murphy wrote:
>>> Coretemp's vestigial platform driver is odd. All the real work is done
>>> globally by the initcall and CPU hotplug notifiers, while the "driver"
>>> effectively just wraps an allocation and the registration of the hwmon
>>> interface in a long-winded round-trip through the driver core. The whole
>>> logic of dynamically creating and destroying platform devices to bring
>>> the interfaces up and down is fatally flawed right away, since it
>>> notifier from a platform bus notifier.
>> [...]
>>> I haven't been able to fully test hotplug since I only have a
>>> single-socket Intel system to hand.
>> Someone with access to hardware will have to validate this.
> That afaics has happened.
> 
>> For both subject and description, please avoid terms like "antics",
>> "odd", or "questionable". Please describe the problem in neutral terms.
> Robin, did you take care of that?

Robin, are you busy, or is there some other reason why this regression
fix afaics isn't making any progress?

Janusz Krzysztofik, as you seemed to care: if you have a minute and
Robin doesn't show up within a day or wo, could you maybe improve
subject and description to make Guenter happy?

Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)

P.S.: As the Linux kernel's regression tracker I deal with a lot of
reports and sometimes miss something important when writing mails like
this. If that's the case here, don't hesitate to tell me in a public
reply, it's in everyone's interest to set the public record straight.

#regzbot poke
  

Patch

diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 8bf32c6c85d9..9fa68a81625e 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -543,66 +543,49 @@  static void coretemp_remove_core(struct platform_data *pdata, int indx)
 		ida_free(&pdata->ida, indx - BASE_SYSFS_ATTR_NO);
 }
 
-static int coretemp_probe(struct platform_device *pdev)
+static int coretemp_device_add(int zoneid)
 {
-	struct device *dev = &pdev->dev;
+	struct platform_device *pdev;
 	struct platform_data *pdata;
+	int err;
 
 	/* Initialize the per-zone data structures */
-	pdata = devm_kzalloc(dev, sizeof(struct platform_data), GFP_KERNEL);
+	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
 		return -ENOMEM;
 
-	pdata->pkg_id = pdev->id;
+	pdata->pkg_id = zoneid;
 	ida_init(&pdata->ida);
-	platform_set_drvdata(pdev, pdata);
-
-	pdata->hwmon_dev = devm_hwmon_device_register_with_groups(dev, DRVNAME,
-								  pdata, NULL);
-	return PTR_ERR_OR_ZERO(pdata->hwmon_dev);
-}
-
-static int coretemp_remove(struct platform_device *pdev)
-{
-	struct platform_data *pdata = platform_get_drvdata(pdev);
-	int i;
-
-	for (i = MAX_CORE_DATA - 1; i >= 0; --i)
-		if (pdata->core_data[i])
-			coretemp_remove_core(pdata, i);
-
-	ida_destroy(&pdata->ida);
-	return 0;
-}
-
-static struct platform_driver coretemp_driver = {
-	.driver = {
-		.name = DRVNAME,
-	},
-	.probe = coretemp_probe,
-	.remove = coretemp_remove,
-};
-
-static struct platform_device *coretemp_device_add(unsigned int cpu)
-{
-	int err, zoneid = topology_logical_die_id(cpu);
-	struct platform_device *pdev;
-
-	if (zoneid < 0)
-		return ERR_PTR(-ENOMEM);
 
 	pdev = platform_device_alloc(DRVNAME, zoneid);
-	if (!pdev)
-		return ERR_PTR(-ENOMEM);
-
-	err = platform_device_add(pdev);
-	if (err) {
-		platform_device_put(pdev);
-		return ERR_PTR(err);
+	if (!pdev) {
+		err = -ENOMEM;
+		goto err_free_pdata;
 	}
 
+	err = platform_device_add(pdev);
+	if (err)
+		goto err_put_dev;
+
+	platform_set_drvdata(pdev, pdata);
 	zone_devices[zoneid] = pdev;
-	return pdev;
+	return 0;
+
+err_put_dev:
+	platform_device_put(pdev);
+err_free_pdata:
+	kfree(pdata);
+	return err;
+}
+
+static void coretemp_device_remove(int zoneid)
+{
+	struct platform_device *pdev = zone_devices[zoneid];
+	struct platform_data *pdata = platform_get_drvdata(pdev);
+
+	ida_destroy(&pdata->ida);
+	kfree(pdata);
+	platform_device_unregister(pdev);
 }
 
 static int coretemp_cpu_online(unsigned int cpu)
@@ -626,7 +609,10 @@  static int coretemp_cpu_online(unsigned int cpu)
 	if (!cpu_has(c, X86_FEATURE_DTHERM))
 		return -ENODEV;
 
-	if (!pdev) {
+	pdata = platform_get_drvdata(pdev);
+	if (!pdata->hwmon_dev) {
+		struct device *hwmon;
+
 		/* Check the microcode version of the CPU */
 		if (chk_ucode_version(cpu))
 			return -EINVAL;
@@ -637,9 +623,11 @@  static int coretemp_cpu_online(unsigned int cpu)
 		 * online. So, initialize per-pkg data structures and
 		 * then bring this core online.
 		 */
-		pdev = coretemp_device_add(cpu);
-		if (IS_ERR(pdev))
-			return PTR_ERR(pdev);
+		hwmon = hwmon_device_register_with_groups(&pdev->dev, DRVNAME,
+							  pdata, NULL);
+		if (IS_ERR(hwmon))
+			return PTR_ERR(hwmon);
+		pdata->hwmon_dev = hwmon;
 
 		/*
 		 * Check whether pkgtemp support is available.
@@ -649,7 +637,6 @@  static int coretemp_cpu_online(unsigned int cpu)
 			coretemp_add_core(pdev, cpu, 1);
 	}
 
-	pdata = platform_get_drvdata(pdev);
 	/*
 	 * Check whether a thread sibling is already online. If not add the
 	 * interface for this CPU core.
@@ -668,18 +655,14 @@  static int coretemp_cpu_offline(unsigned int cpu)
 	struct temp_data *tdata;
 	int i, indx = -1, target;
 
-	/*
-	 * Don't execute this on suspend as the device remove locks
-	 * up the machine.
-	 */
+	/* No need to tear down any interfaces for suspend */
 	if (cpuhp_tasks_frozen)
 		return 0;
 
 	/* If the physical CPU device does not exist, just return */
-	if (!pdev)
-		return 0;
-
 	pd = platform_get_drvdata(pdev);
+	if (!pd->hwmon_dev)
+		return 0;
 
 	for (i = 0; i < NUM_REAL_CORES; i++) {
 		if (pd->cpu_map[i] == topology_core_id(cpu)) {
@@ -711,13 +694,14 @@  static int coretemp_cpu_offline(unsigned int cpu)
 	}
 
 	/*
-	 * If all cores in this pkg are offline, remove the device. This
-	 * will invoke the platform driver remove function, which cleans up
-	 * the rest.
+	 * If all cores in this pkg are offline, remove the interface.
 	 */
+	tdata = pd->core_data[PKG_SYSFS_ATTR_NO];
 	if (cpumask_empty(&pd->cpumask)) {
-		zone_devices[topology_logical_die_id(cpu)] = NULL;
-		platform_device_unregister(pdev);
+		if (tdata)
+			coretemp_remove_core(pd, PKG_SYSFS_ATTR_NO);
+		hwmon_device_unregister(pd->hwmon_dev);
+		pd->hwmon_dev = NULL;
 		return 0;
 	}
 
@@ -725,7 +709,6 @@  static int coretemp_cpu_offline(unsigned int cpu)
 	 * Check whether this core is the target for the package
 	 * interface. We need to assign it to some other cpu.
 	 */
-	tdata = pd->core_data[PKG_SYSFS_ATTR_NO];
 	if (tdata && tdata->cpu == cpu) {
 		target = cpumask_first(&pd->cpumask);
 		mutex_lock(&tdata->update_lock);
@@ -744,7 +727,7 @@  static enum cpuhp_state coretemp_hp_online;
 
 static int __init coretemp_init(void)
 {
-	int err;
+	int i, err;
 
 	/*
 	 * CPUID.06H.EAX[0] indicates whether the CPU has thermal
@@ -760,20 +743,22 @@  static int __init coretemp_init(void)
 	if (!zone_devices)
 		return -ENOMEM;
 
-	err = platform_driver_register(&coretemp_driver);
-	if (err)
-		goto outzone;
+	for (i = 0; i < max_zones; i++) {
+		err = coretemp_device_add(i);
+		if (err)
+			goto outzone;
+	}
 
 	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hwmon/coretemp:online",
 				coretemp_cpu_online, coretemp_cpu_offline);
 	if (err < 0)
-		goto outdrv;
+		goto outzone;
 	coretemp_hp_online = err;
 	return 0;
 
-outdrv:
-	platform_driver_unregister(&coretemp_driver);
 outzone:
+	while (i--)
+		coretemp_device_remove(i);
 	kfree(zone_devices);
 	return err;
 }
@@ -781,8 +766,11 @@  module_init(coretemp_init)
 
 static void __exit coretemp_exit(void)
 {
+	int i;
+
 	cpuhp_remove_state(coretemp_hp_online);
-	platform_driver_unregister(&coretemp_driver);
+	for (i = 0; i < max_zones; i++)
+		coretemp_device_remove(i);
 	kfree(zone_devices);
 }
 module_exit(coretemp_exit)