devcoredump: Remove devcoredump device if failing device is gone

Message ID 20240117195349.343083-1-rodrigo.vivi@intel.com
State New
Headers
Series devcoredump: Remove devcoredump device if failing device is gone |

Commit Message

Rodrigo Vivi Jan. 17, 2024, 7:53 p.m. UTC
  Make dev_coredumpm a real device managed helper, that not only
frees the device after a scheduled delay (DEVCD_TIMEOUT), but
also when the failing/crashed device is gone.

The module remove for the drivers using devcoredump are currently
broken if attempted between the crash and the DEVCD_TIMEOUT, since
the symbolic sysfs link won't be deleted.

On top of that, for PCI devices, the unbind of the device will
call the pci .remove void function, that cannot fail. At that
time, our device is pretty much gone, but the read and free
functions are alive trough the devcoredump device and they
can get some NULL dereferences or use after free.

So, if the failing-device is gone, let's cancel the scheduled
work and remove devcoredump-device immediately.

Cc: Jose Souza <jose.souza@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/base/devcoredump.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)
  

Comments

Souza, Jose Jan. 19, 2024, 6:13 p.m. UTC | #1
On Wed, 2024-01-17 at 14:53 -0500, Rodrigo Vivi wrote:
> Make dev_coredumpm a real device managed helper, that not only
> frees the device after a scheduled delay (DEVCD_TIMEOUT), but
> also when the failing/crashed device is gone.
> 
> The module remove for the drivers using devcoredump are currently
> broken if attempted between the crash and the DEVCD_TIMEOUT, since
> the symbolic sysfs link won't be deleted.
> 
> On top of that, for PCI devices, the unbind of the device will
> call the pci .remove void function, that cannot fail. At that
> time, our device is pretty much gone, but the read and free
> functions are alive trough the devcoredump device and they
> can get some NULL dereferences or use after free.
> 
> So, if the failing-device is gone, let's cancel the scheduled
> work and remove devcoredump-device immediately.
> 
> Cc: Jose Souza <jose.souza@intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Johannes Berg <johannes@sipsolutions.net>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
> ---
>  drivers/base/devcoredump.c | 29 ++++++++++++++++++++++-------
>  1 file changed, 22 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
> index 7e2d1f0d903a..6db7a2fd9a02 100644
> --- a/drivers/base/devcoredump.c
> +++ b/drivers/base/devcoredump.c
> @@ -8,6 +8,7 @@
>  #include <linux/module.h>
>  #include <linux/device.h>
>  #include <linux/devcoredump.h>
> +#include <linux/devm-helpers.h>
>  #include <linux/list.h>
>  #include <linux/slab.h>
>  #include <linux/fs.h>
> @@ -118,19 +119,24 @@ static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj,
>  	return devcd->read(buffer, offset, count, devcd->data, devcd->datalen);
>  }
>  
> -static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
> -				struct bin_attribute *bin_attr,
> -				char *buffer, loff_t offset, size_t count)
> +static void devcd_remove_now(struct devcd_entry *devcd)

this function can also be used by devcd_free().

Other than that LGTM.

>  {
> -	struct device *dev = kobj_to_dev(kobj);
> -	struct devcd_entry *devcd = dev_to_devcd(dev);
> -
>  	mutex_lock(&devcd->mutex);
>  	if (!devcd->delete_work) {
>  		devcd->delete_work = true;
>  		mod_delayed_work(system_wq, &devcd->del_wk, 0);
>  	}
>  	mutex_unlock(&devcd->mutex);
> +}
> +
> +static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
> +				struct bin_attribute *bin_attr,
> +				char *buffer, loff_t offset, size_t count)
> +{
> +	struct device *dev = kobj_to_dev(kobj);
> +	struct devcd_entry *devcd = dev_to_devcd(dev);
> +
> +	devcd_remove_now(devcd);
>  
>  	return count;
>  }
> @@ -304,6 +310,12 @@ static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset,
>  				  offset);
>  }
>  
> +static void devcd_remove(void *data)
> +{
> +	struct devcd_entry *devcd = data;
> +	devcd_remove_now(devcd);
> +}
> +
>  /**
>   * dev_coredumpm - create device coredump with read/free methods
>   * @dev: the struct device for the crashed device
> @@ -379,7 +391,10 @@ void dev_coredumpm(struct device *dev, struct module *owner,
>  
>  	dev_set_uevent_suppress(&devcd->devcd_dev, false);
>  	kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
> -	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
> +	if (devm_add_action(dev, devcd_remove, devcd))
> +		dev_warn(dev, "devcoredump managed auto-removal registration failed\n");
> +	if (devm_delayed_work_autocancel(dev, &devcd->del_wk, devcd_del))
> +		dev_warn(dev, "devcoredump managed autocancel work failed\n");
>  	schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
>  	mutex_unlock(&devcd->mutex);
>  	return;
  
Rodrigo Vivi Jan. 22, 2024, 9:24 p.m. UTC | #2
On Fri, Jan 19, 2024 at 01:13:45PM -0500, Souza, Jose wrote:
> On Wed, 2024-01-17 at 14:53 -0500, Rodrigo Vivi wrote:
> > Make dev_coredumpm a real device managed helper, that not only
> > frees the device after a scheduled delay (DEVCD_TIMEOUT), but
> > also when the failing/crashed device is gone.
> > 
> > The module remove for the drivers using devcoredump are currently
> > broken if attempted between the crash and the DEVCD_TIMEOUT, since
> > the symbolic sysfs link won't be deleted.
> > 
> > On top of that, for PCI devices, the unbind of the device will
> > call the pci .remove void function, that cannot fail. At that
> > time, our device is pretty much gone, but the read and free
> > functions are alive trough the devcoredump device and they
> > can get some NULL dereferences or use after free.
> > 
> > So, if the failing-device is gone, let's cancel the scheduled
> > work and remove devcoredump-device immediately.
> > 
> > Cc: Jose Souza <jose.souza@intel.com>
> > Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > Cc: Johannes Berg <johannes@sipsolutions.net>
> > Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > ---
> >  drivers/base/devcoredump.c | 29 ++++++++++++++++++++++-------
> >  1 file changed, 22 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
> > index 7e2d1f0d903a..6db7a2fd9a02 100644
> > --- a/drivers/base/devcoredump.c
> > +++ b/drivers/base/devcoredump.c
> > @@ -8,6 +8,7 @@
> >  #include <linux/module.h>
> >  #include <linux/device.h>
> >  #include <linux/devcoredump.h>
> > +#include <linux/devm-helpers.h>
> >  #include <linux/list.h>
> >  #include <linux/slab.h>
> >  #include <linux/fs.h>
> > @@ -118,19 +119,24 @@ static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj,
> >  	return devcd->read(buffer, offset, count, devcd->data, devcd->datalen);
> >  }
> >  
> > -static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
> > -				struct bin_attribute *bin_attr,
> > -				char *buffer, loff_t offset, size_t count)
> > +static void devcd_remove_now(struct devcd_entry *devcd)
> 
> this function can also be used by devcd_free().

well, indeed.
And perhaps using the

flush_delayed_work(&devcd->del_wk);

instead of

mod_delayed_work(system_wq, &devcd->del_wk, 0);

and then I don't even need to switch from
INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
to
devm_delayed_work_autocancel()

since it will be flushed, so no need to autocancel it.

Johannes, any hard preference or request from your side?

Thanks,
Rodrigo.

> 
> Other than that LGTM.
> 
> >  {
> > -	struct device *dev = kobj_to_dev(kobj);
> > -	struct devcd_entry *devcd = dev_to_devcd(dev);
> > -
> >  	mutex_lock(&devcd->mutex);
> >  	if (!devcd->delete_work) {
> >  		devcd->delete_work = true;
> >  		mod_delayed_work(system_wq, &devcd->del_wk, 0);
> >  	}
> >  	mutex_unlock(&devcd->mutex);
> > +}
> > +
> > +static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
> > +				struct bin_attribute *bin_attr,
> > +				char *buffer, loff_t offset, size_t count)
> > +{
> > +	struct device *dev = kobj_to_dev(kobj);
> > +	struct devcd_entry *devcd = dev_to_devcd(dev);
> > +
> > +	devcd_remove_now(devcd);
> >  
> >  	return count;
> >  }
> > @@ -304,6 +310,12 @@ static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset,
> >  				  offset);
> >  }
> >  
> > +static void devcd_remove(void *data)
> > +{
> > +	struct devcd_entry *devcd = data;
> > +	devcd_remove_now(devcd);
> > +}
> > +
> >  /**
> >   * dev_coredumpm - create device coredump with read/free methods
> >   * @dev: the struct device for the crashed device
> > @@ -379,7 +391,10 @@ void dev_coredumpm(struct device *dev, struct module *owner,
> >  
> >  	dev_set_uevent_suppress(&devcd->devcd_dev, false);
> >  	kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
> > -	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
> > +	if (devm_add_action(dev, devcd_remove, devcd))
> > +		dev_warn(dev, "devcoredump managed auto-removal registration failed\n");
> > +	if (devm_delayed_work_autocancel(dev, &devcd->del_wk, devcd_del))
> > +		dev_warn(dev, "devcoredump managed autocancel work failed\n");
> >  	schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
> >  	mutex_unlock(&devcd->mutex);
> >  	return;
>
  

Patch

diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
index 7e2d1f0d903a..6db7a2fd9a02 100644
--- a/drivers/base/devcoredump.c
+++ b/drivers/base/devcoredump.c
@@ -8,6 +8,7 @@ 
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/devcoredump.h>
+#include <linux/devm-helpers.h>
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
@@ -118,19 +119,24 @@  static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj,
 	return devcd->read(buffer, offset, count, devcd->data, devcd->datalen);
 }
 
-static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
-				struct bin_attribute *bin_attr,
-				char *buffer, loff_t offset, size_t count)
+static void devcd_remove_now(struct devcd_entry *devcd)
 {
-	struct device *dev = kobj_to_dev(kobj);
-	struct devcd_entry *devcd = dev_to_devcd(dev);
-
 	mutex_lock(&devcd->mutex);
 	if (!devcd->delete_work) {
 		devcd->delete_work = true;
 		mod_delayed_work(system_wq, &devcd->del_wk, 0);
 	}
 	mutex_unlock(&devcd->mutex);
+}
+
+static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *bin_attr,
+				char *buffer, loff_t offset, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct devcd_entry *devcd = dev_to_devcd(dev);
+
+	devcd_remove_now(devcd);
 
 	return count;
 }
@@ -304,6 +310,12 @@  static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset,
 				  offset);
 }
 
+static void devcd_remove(void *data)
+{
+	struct devcd_entry *devcd = data;
+	devcd_remove_now(devcd);
+}
+
 /**
  * dev_coredumpm - create device coredump with read/free methods
  * @dev: the struct device for the crashed device
@@ -379,7 +391,10 @@  void dev_coredumpm(struct device *dev, struct module *owner,
 
 	dev_set_uevent_suppress(&devcd->devcd_dev, false);
 	kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
-	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
+	if (devm_add_action(dev, devcd_remove, devcd))
+		dev_warn(dev, "devcoredump managed auto-removal registration failed\n");
+	if (devm_delayed_work_autocancel(dev, &devcd->del_wk, devcd_del))
+		dev_warn(dev, "devcoredump managed autocancel work failed\n");
 	schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
 	mutex_unlock(&devcd->mutex);
 	return;