[1/2] mtd: use refcount to prevent corruption

Message ID 20230620131905.648089-2-alexander.usyskin@intel.com
State New
Headers
Series mtd: prepare for dynamically removed devices |

Commit Message

Usyskin, Alexander June 20, 2023, 1:19 p.m. UTC
  From: Tomas Winkler <tomas.winkler@intel.com>

When underlying device is removed mtd core will crash
in case user space is holding open handle.
Need to use proper refcounting so device is release
only when has no users.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
---
 drivers/mtd/mtdcore.c   | 72 ++++++++++++++++++++++-------------------
 drivers/mtd/mtdcore.h   |  1 +
 drivers/mtd/mtdpart.c   | 14 ++++----
 include/linux/mtd/mtd.h |  2 +-
 4 files changed, 49 insertions(+), 40 deletions(-)
  

Comments

Miquel Raynal July 12, 2023, 2:14 p.m. UTC | #1
On Tue, 2023-06-20 at 13:19:04 UTC, Alexander Usyskin wrote:
> From: Tomas Winkler <tomas.winkler@intel.com>
> 
> When underlying device is removed mtd core will crash
> in case user space is holding open handle.
> Need to use proper refcounting so device is release
> only when has no users.
> 
> Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
> Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>

Applied to https://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git mtd/next, thanks.

Miquel
  
Fabrizio Castro July 14, 2023, 4:10 p.m. UTC | #2
Dear All,

I am sorry for reopening this topic, but as it turns out (after bisecting
linux-next/master) this patch is interfering with a use case I am working
on.

I am using a Renesas RZ/V2M EVK v2.0 platform, I have an SPI NOR memory
("micron,mt25ql256a") wired up to a connector on the platform, the SPI
master is using driver (built as module):
drivers/spi/spi-rzv2m-csi.c

Although the board device tree in mainline does not reflect the connection
of CSI4 (which is the SPI master) from the SoC to the "micron,mt25ql256a"
(SPI slave device), my local device tree comes with the necessary definitions.

Without this patch, when I load up the module, I get the below 3 devices:
/dev/mtd0
/dev/mtd0ro
/dev/mtdblock0

They get cleaned up correctly upon module removal.
I can reload the same module, and everything works just fine.

With this patch applied, when I load up the module, I get the same 3
devices:
/dev/mtd0
/dev/mtd0ro
/dev/mtdblock0

Upon removal, the below 2 devices still hang around:
/dev/mtd0
/dev/mtd0ro

Preventing the module from being (re)loaded correctly:
rzv2m_csi a4020200.spi: error -EBUSY: register controller failed
rzv2m_csi: probe of a4020200.spi failed with error -16

Are you guys aware of this sort of side effect?

Thanks,
Fab

> From: Alexander Usyskin <alexander.usyskin@intel.com>
> Subject: [PATCH 1/2] mtd: use refcount to prevent corruption
> 
> From: Tomas Winkler <tomas.winkler@intel.com>
> 
> When underlying device is removed mtd core will crash
> in case user space is holding open handle.
> Need to use proper refcounting so device is release
> only when has no users.
> 
> Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
> Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
> ---
>  drivers/mtd/mtdcore.c   | 72 ++++++++++++++++++++++------------------
> -
>  drivers/mtd/mtdcore.h   |  1 +
>  drivers/mtd/mtdpart.c   | 14 ++++----
>  include/linux/mtd/mtd.h |  2 +-
>  4 files changed, 49 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
> index abf4cb58a8ab..84bd1878367d 100644
> --- a/drivers/mtd/mtdcore.c
> +++ b/drivers/mtd/mtdcore.c
> @@ -93,10 +93,33 @@ static void mtd_release(struct device *dev)
>  	struct mtd_info *mtd = dev_get_drvdata(dev);
>  	dev_t index = MTD_DEVT(mtd->index);
> 
> +	if (mtd_is_partition(mtd))
> +		release_mtd_partition(mtd);
> +
>  	/* remove /dev/mtdXro node */
>  	device_destroy(&mtd_class, index + 1);
>  }
> 
> +static void mtd_device_release(struct kref *kref)
> +{
> +	struct mtd_info *mtd = container_of(kref, struct mtd_info,
> refcnt);
> +
> +	debugfs_remove_recursive(mtd->dbg.dfs_dir);
> +
> +	/* Try to remove the NVMEM provider */
> +	nvmem_unregister(mtd->nvmem);
> +
> +	device_unregister(&mtd->dev);
> +
> +	/* Clear dev so mtd can be safely re-registered later if desired
> */
> +	memset(&mtd->dev, 0, sizeof(mtd->dev));
> +
> +	idr_remove(&mtd_idr, mtd->index);
> +	of_node_put(mtd_get_of_node(mtd));
> +
> +	module_put(THIS_MODULE);
> +}
> +
>  #define MTD_DEVICE_ATTR_RO(name) \
>  static DEVICE_ATTR(name, 0444, mtd_##name##_show, NULL)
> 
> @@ -666,7 +689,7 @@ int add_mtd_device(struct mtd_info *mtd)
>  	}
> 
>  	mtd->index = i;
> -	mtd->usecount = 0;
> +	kref_init(&mtd->refcnt);
> 
>  	/* default value if not set by driver */
>  	if (mtd->bitflip_threshold == 0)
> @@ -779,7 +802,6 @@ int del_mtd_device(struct mtd_info *mtd)
>  {
>  	int ret;
>  	struct mtd_notifier *not;
> -	struct device_node *mtd_of_node;
> 
>  	mutex_lock(&mtd_table_mutex);
> 
> @@ -793,28 +815,8 @@ int del_mtd_device(struct mtd_info *mtd)
>  	list_for_each_entry(not, &mtd_notifiers, list)
>  		not->remove(mtd);
> 
> -	if (mtd->usecount) {
> -		printk(KERN_NOTICE "Removing MTD device #%d (%s) with use
> count %d\n",
> -		       mtd->index, mtd->name, mtd->usecount);
> -		ret = -EBUSY;
> -	} else {
> -		mtd_of_node = mtd_get_of_node(mtd);
> -		debugfs_remove_recursive(mtd->dbg.dfs_dir);
> -
> -		/* Try to remove the NVMEM provider */
> -		nvmem_unregister(mtd->nvmem);
> -
> -		device_unregister(&mtd->dev);
> -
> -		/* Clear dev so mtd can be safely re-registered later if
> desired */
> -		memset(&mtd->dev, 0, sizeof(mtd->dev));
> -
> -		idr_remove(&mtd_idr, mtd->index);
> -		of_node_put(mtd_of_node);
> -
> -		module_put(THIS_MODULE);
> -		ret = 0;
> -	}
> +	kref_put(&mtd->refcnt, mtd_device_release);
> +	ret = 0;
> 
>  out_error:
>  	mutex_unlock(&mtd_table_mutex);
> @@ -1228,19 +1230,21 @@ int __get_mtd_device(struct mtd_info *mtd)
>  	if (!try_module_get(master->owner))
>  		return -ENODEV;
> 
> +	kref_get(&mtd->refcnt);
> +
>  	if (master->_get_device) {
>  		err = master->_get_device(mtd);
> 
>  		if (err) {
> +			kref_put(&mtd->refcnt, mtd_device_release);
>  			module_put(master->owner);
>  			return err;
>  		}
>  	}
> 
> -	master->usecount++;
> -
>  	while (mtd->parent) {
> -		mtd->usecount++;
> +		if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd-
> >parent != master)
> +			kref_get(&mtd->parent->refcnt);
>  		mtd = mtd->parent;
>  	}
> 
> @@ -1327,18 +1331,20 @@ void __put_mtd_device(struct mtd_info *mtd)
>  {
>  	struct mtd_info *master = mtd_get_master(mtd);
> 
> -	while (mtd->parent) {
> -		--mtd->usecount;
> -		BUG_ON(mtd->usecount < 0);
> -		mtd = mtd->parent;
> -	}
> +	while (mtd != master) {
> +		struct mtd_info *parent = mtd->parent;
> 
> -	master->usecount--;
> +		kref_put(&mtd->refcnt, mtd_device_release);
> +		mtd = parent;
> +	}
> 
>  	if (master->_put_device)
>  		master->_put_device(master);
> 
>  	module_put(master->owner);
> +
> +	if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
> +		kref_put(&master->refcnt, mtd_device_release);
>  }
>  EXPORT_SYMBOL_GPL(__put_mtd_device);
> 
> diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h
> index b5eefeabf310..b014861a06a6 100644
> --- a/drivers/mtd/mtdcore.h
> +++ b/drivers/mtd/mtdcore.h
> @@ -12,6 +12,7 @@ int __must_check add_mtd_device(struct mtd_info
> *mtd);
>  int del_mtd_device(struct mtd_info *mtd);
>  int add_mtd_partitions(struct mtd_info *, const struct mtd_partition
> *, int);
>  int del_mtd_partitions(struct mtd_info *);
> +void release_mtd_partition(struct mtd_info *mtd);
> 
>  struct mtd_partitions;
> 
> diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
> index a46affbb037d..23483db8f30c 100644
> --- a/drivers/mtd/mtdpart.c
> +++ b/drivers/mtd/mtdpart.c
> @@ -32,6 +32,12 @@ static inline void free_partition(struct mtd_info
> *mtd)
>  	kfree(mtd);
>  }
> 
> +void release_mtd_partition(struct mtd_info *mtd)
> +{
> +	WARN_ON(!list_empty(&mtd->part.node));
> +	free_partition(mtd);
> +}
> +
>  static struct mtd_info *allocate_partition(struct mtd_info *parent,
>  					   const struct mtd_partition *part,
>  					   int partno, uint64_t cur_offset)
> @@ -309,13 +315,11 @@ static int __mtd_del_partition(struct mtd_info
> *mtd)
> 
>  	sysfs_remove_files(&mtd->dev.kobj, mtd_partition_attrs);
> 
> +	list_del_init(&mtd->part.node);
>  	err = del_mtd_device(mtd);
>  	if (err)
>  		return err;
> 
> -	list_del(&mtd->part.node);
> -	free_partition(mtd);
> -
>  	return 0;
>  }
> 
> @@ -333,6 +337,7 @@ static int __del_mtd_partitions(struct mtd_info
> *mtd)
>  			__del_mtd_partitions(child);
> 
>  		pr_info("Deleting %s MTD partition\n", child->name);
> +		list_del_init(&child->part.node);
>  		ret = del_mtd_device(child);
>  		if (ret < 0) {
>  			pr_err("Error when deleting partition \"%s\" (%d)\n",
> @@ -340,9 +345,6 @@ static int __del_mtd_partitions(struct mtd_info
> *mtd)
>  			err = ret;
>  			continue;
>  		}
> -
> -		list_del(&child->part.node);
> -		free_partition(child);
>  	}
> 
>  	return err;
> diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
> index 7c58c44662b8..914a9f974baa 100644
> --- a/include/linux/mtd/mtd.h
> +++ b/include/linux/mtd/mtd.h
> @@ -379,7 +379,7 @@ struct mtd_info {
> 
>  	struct module *owner;
>  	struct device dev;
> -	int usecount;
> +	struct kref refcnt;
>  	struct mtd_debug_info dbg;
>  	struct nvmem_device *nvmem;
>  	struct nvmem_device *otp_user_nvmem;
> --
> 2.34.1
  
Miquel Raynal July 15, 2023, 3:41 p.m. UTC | #3
Hi Fabrizio,

fabrizio.castro.jz@renesas.com wrote on Fri, 14 Jul 2023 16:10:45 +0000:

> Dear All,
> 
> I am sorry for reopening this topic, but as it turns out (after bisecting
> linux-next/master) this patch is interfering with a use case I am working
> on.
> 
> I am using a Renesas RZ/V2M EVK v2.0 platform, I have an SPI NOR memory
> ("micron,mt25ql256a") wired up to a connector on the platform, the SPI
> master is using driver (built as module):
> drivers/spi/spi-rzv2m-csi.c
> 
> Although the board device tree in mainline does not reflect the connection
> of CSI4 (which is the SPI master) from the SoC to the "micron,mt25ql256a"
> (SPI slave device), my local device tree comes with the necessary definitions.
> 
> Without this patch, when I load up the module, I get the below 3 devices:
> /dev/mtd0
> /dev/mtd0ro
> /dev/mtdblock0
> 
> They get cleaned up correctly upon module removal.
> I can reload the same module, and everything works just fine.
> 
> With this patch applied, when I load up the module, I get the same 3
> devices:
> /dev/mtd0
> /dev/mtd0ro
> /dev/mtdblock0
> 
> Upon removal, the below 2 devices still hang around:
> /dev/mtd0
> /dev/mtd0ro

Looks like the refcounting change is still not even in some cases, can
you investigate and come up with a proper patch? You can either improve
the existing patch or revert it and try your own approach if deemed
better.

Thanks,
Miquèl

> Preventing the module from being (re)loaded correctly:
> rzv2m_csi a4020200.spi: error -EBUSY: register controller failed
> rzv2m_csi: probe of a4020200.spi failed with error -16
> 
> Are you guys aware of this sort of side effect?
> 
> Thanks,
> Fab
> 
> > From: Alexander Usyskin <alexander.usyskin@intel.com>
> > Subject: [PATCH 1/2] mtd: use refcount to prevent corruption
> > 
> > From: Tomas Winkler <tomas.winkler@intel.com>
> > 
> > When underlying device is removed mtd core will crash
> > in case user space is holding open handle.
> > Need to use proper refcounting so device is release
> > only when has no users.
> > 
> > Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
> > Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
> > ---
> >  drivers/mtd/mtdcore.c   | 72 ++++++++++++++++++++++------------------
> > -
> >  drivers/mtd/mtdcore.h   |  1 +
> >  drivers/mtd/mtdpart.c   | 14 ++++----
> >  include/linux/mtd/mtd.h |  2 +-
> >  4 files changed, 49 insertions(+), 40 deletions(-)
> > 
> > diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
> > index abf4cb58a8ab..84bd1878367d 100644
> > --- a/drivers/mtd/mtdcore.c
> > +++ b/drivers/mtd/mtdcore.c
> > @@ -93,10 +93,33 @@ static void mtd_release(struct device *dev)
> >  	struct mtd_info *mtd = dev_get_drvdata(dev);
> >  	dev_t index = MTD_DEVT(mtd->index);
> > 
> > +	if (mtd_is_partition(mtd))
> > +		release_mtd_partition(mtd);
> > +
> >  	/* remove /dev/mtdXro node */
> >  	device_destroy(&mtd_class, index + 1);
> >  }
> > 
> > +static void mtd_device_release(struct kref *kref)
> > +{
> > +	struct mtd_info *mtd = container_of(kref, struct mtd_info,
> > refcnt);
> > +
> > +	debugfs_remove_recursive(mtd->dbg.dfs_dir);
> > +
> > +	/* Try to remove the NVMEM provider */
> > +	nvmem_unregister(mtd->nvmem);
> > +
> > +	device_unregister(&mtd->dev);
> > +
> > +	/* Clear dev so mtd can be safely re-registered later if desired
> > */
> > +	memset(&mtd->dev, 0, sizeof(mtd->dev));
> > +
> > +	idr_remove(&mtd_idr, mtd->index);
> > +	of_node_put(mtd_get_of_node(mtd));
> > +
> > +	module_put(THIS_MODULE);
> > +}
> > +
> >  #define MTD_DEVICE_ATTR_RO(name) \
> >  static DEVICE_ATTR(name, 0444, mtd_##name##_show, NULL)
> > 
> > @@ -666,7 +689,7 @@ int add_mtd_device(struct mtd_info *mtd)
> >  	}
> > 
> >  	mtd->index = i;
> > -	mtd->usecount = 0;
> > +	kref_init(&mtd->refcnt);
> > 
> >  	/* default value if not set by driver */
> >  	if (mtd->bitflip_threshold == 0)
> > @@ -779,7 +802,6 @@ int del_mtd_device(struct mtd_info *mtd)
> >  {
> >  	int ret;
> >  	struct mtd_notifier *not;
> > -	struct device_node *mtd_of_node;
> > 
> >  	mutex_lock(&mtd_table_mutex);
> > 
> > @@ -793,28 +815,8 @@ int del_mtd_device(struct mtd_info *mtd)
> >  	list_for_each_entry(not, &mtd_notifiers, list)
> >  		not->remove(mtd);
> > 
> > -	if (mtd->usecount) {
> > -		printk(KERN_NOTICE "Removing MTD device #%d (%s) with use
> > count %d\n",
> > -		       mtd->index, mtd->name, mtd->usecount);
> > -		ret = -EBUSY;
> > -	} else {
> > -		mtd_of_node = mtd_get_of_node(mtd);
> > -		debugfs_remove_recursive(mtd->dbg.dfs_dir);
> > -
> > -		/* Try to remove the NVMEM provider */
> > -		nvmem_unregister(mtd->nvmem);
> > -
> > -		device_unregister(&mtd->dev);
> > -
> > -		/* Clear dev so mtd can be safely re-registered later if
> > desired */
> > -		memset(&mtd->dev, 0, sizeof(mtd->dev));
> > -
> > -		idr_remove(&mtd_idr, mtd->index);
> > -		of_node_put(mtd_of_node);
> > -
> > -		module_put(THIS_MODULE);
> > -		ret = 0;
> > -	}
> > +	kref_put(&mtd->refcnt, mtd_device_release);
> > +	ret = 0;
> > 
> >  out_error:
> >  	mutex_unlock(&mtd_table_mutex);
> > @@ -1228,19 +1230,21 @@ int __get_mtd_device(struct mtd_info *mtd)
> >  	if (!try_module_get(master->owner))
> >  		return -ENODEV;
> > 
> > +	kref_get(&mtd->refcnt);
> > +
> >  	if (master->_get_device) {
> >  		err = master->_get_device(mtd);
> > 
> >  		if (err) {
> > +			kref_put(&mtd->refcnt, mtd_device_release);
> >  			module_put(master->owner);
> >  			return err;
> >  		}
> >  	}
> > 
> > -	master->usecount++;
> > -
> >  	while (mtd->parent) {
> > -		mtd->usecount++;
> > +		if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd-  
> > >parent != master)  
> > +			kref_get(&mtd->parent->refcnt);
> >  		mtd = mtd->parent;
> >  	}
> > 
> > @@ -1327,18 +1331,20 @@ void __put_mtd_device(struct mtd_info *mtd)
> >  {
> >  	struct mtd_info *master = mtd_get_master(mtd);
> > 
> > -	while (mtd->parent) {
> > -		--mtd->usecount;
> > -		BUG_ON(mtd->usecount < 0);
> > -		mtd = mtd->parent;
> > -	}
> > +	while (mtd != master) {
> > +		struct mtd_info *parent = mtd->parent;
> > 
> > -	master->usecount--;
> > +		kref_put(&mtd->refcnt, mtd_device_release);
> > +		mtd = parent;
> > +	}
> > 
> >  	if (master->_put_device)
> >  		master->_put_device(master);
> > 
> >  	module_put(master->owner);
> > +
> > +	if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
> > +		kref_put(&master->refcnt, mtd_device_release);
> >  }
> >  EXPORT_SYMBOL_GPL(__put_mtd_device);
> > 
> > diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h
> > index b5eefeabf310..b014861a06a6 100644
> > --- a/drivers/mtd/mtdcore.h
> > +++ b/drivers/mtd/mtdcore.h
> > @@ -12,6 +12,7 @@ int __must_check add_mtd_device(struct mtd_info
> > *mtd);
> >  int del_mtd_device(struct mtd_info *mtd);
> >  int add_mtd_partitions(struct mtd_info *, const struct mtd_partition
> > *, int);
> >  int del_mtd_partitions(struct mtd_info *);
> > +void release_mtd_partition(struct mtd_info *mtd);
> > 
> >  struct mtd_partitions;
> > 
> > diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
> > index a46affbb037d..23483db8f30c 100644
> > --- a/drivers/mtd/mtdpart.c
> > +++ b/drivers/mtd/mtdpart.c
> > @@ -32,6 +32,12 @@ static inline void free_partition(struct mtd_info
> > *mtd)
> >  	kfree(mtd);
> >  }
> > 
> > +void release_mtd_partition(struct mtd_info *mtd)
> > +{
> > +	WARN_ON(!list_empty(&mtd->part.node));
> > +	free_partition(mtd);
> > +}
> > +
> >  static struct mtd_info *allocate_partition(struct mtd_info *parent,
> >  					   const struct mtd_partition *part,
> >  					   int partno, uint64_t cur_offset)
> > @@ -309,13 +315,11 @@ static int __mtd_del_partition(struct mtd_info
> > *mtd)
> > 
> >  	sysfs_remove_files(&mtd->dev.kobj, mtd_partition_attrs);
> > 
> > +	list_del_init(&mtd->part.node);
> >  	err = del_mtd_device(mtd);
> >  	if (err)
> >  		return err;
> > 
> > -	list_del(&mtd->part.node);
> > -	free_partition(mtd);
> > -
> >  	return 0;
> >  }
> > 
> > @@ -333,6 +337,7 @@ static int __del_mtd_partitions(struct mtd_info
> > *mtd)
> >  			__del_mtd_partitions(child);
> > 
> >  		pr_info("Deleting %s MTD partition\n", child->name);
> > +		list_del_init(&child->part.node);
> >  		ret = del_mtd_device(child);
> >  		if (ret < 0) {
> >  			pr_err("Error when deleting partition \"%s\" (%d)\n",
> > @@ -340,9 +345,6 @@ static int __del_mtd_partitions(struct mtd_info
> > *mtd)
> >  			err = ret;
> >  			continue;
> >  		}
> > -
> > -		list_del(&child->part.node);
> > -		free_partition(child);
> >  	}
> > 
> >  	return err;
> > diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
> > index 7c58c44662b8..914a9f974baa 100644
> > --- a/include/linux/mtd/mtd.h
> > +++ b/include/linux/mtd/mtd.h
> > @@ -379,7 +379,7 @@ struct mtd_info {
> > 
> >  	struct module *owner;
> >  	struct device dev;
> > -	int usecount;
> > +	struct kref refcnt;
> >  	struct mtd_debug_info dbg;
> >  	struct nvmem_device *nvmem;
> >  	struct nvmem_device *otp_user_nvmem;
> > --
> > 2.34.1  
>
  
Usyskin, Alexander July 16, 2023, 6:29 a.m. UTC | #4
> > With this patch applied, when I load up the module, I get the same 3
> > devices:
> > /dev/mtd0
> > /dev/mtd0ro
> > /dev/mtdblock0
> >
> > Upon removal, the below 2 devices still hang around:
> > /dev/mtd0
> > /dev/mtd0ro
> 
Our use-case do not produce mtdblock, maybe there are some imbalances of get/put?
I have somewhere version with pr_debug after every kref_get/put. That may help to catch where
it missed, I hope.

--
Alexander (Sasha) Usyskin

CSE FW Dev - Host SW
Intel Israel (74) Limited
  
Miquel Raynal July 16, 2023, 1:39 p.m. UTC | #5
Hi Alexander,

alexander.usyskin@intel.com wrote on Sun, 16 Jul 2023 06:29:02 +0000:

> > > With this patch applied, when I load up the module, I get the same 3
> > > devices:
> > > /dev/mtd0
> > > /dev/mtd0ro
> > > /dev/mtdblock0
> > >
> > > Upon removal, the below 2 devices still hang around:
> > > /dev/mtd0
> > > /dev/mtd0ro  
> >   
> Our use-case do not produce mtdblock, maybe there are some imbalances of get/put?
> I have somewhere version with pr_debug after every kref_get/put. That may help to catch where
> it missed, I hope.

I believe mtdblock is the good citizen here. Just disable
CONFIG_MTD_BLOCK from your configuration and you will likely observe
the same issue, just a bit narrowed, perhaps. Indeed, if you manage to
follow all the get/put calls it can help to find an imbalance.

Thanks,
Miquèl
  
Usyskin, Alexander July 24, 2023, 11:43 a.m. UTC | #6
> > > > With this patch applied, when I load up the module, I get the same 3
> > > > devices:
> > > > /dev/mtd0
> > > > /dev/mtd0ro
> > > > /dev/mtdblock0
> > > >
> > > > Upon removal, the below 2 devices still hang around:
> > > > /dev/mtd0
> > > > /dev/mtd0ro
> > >
> > Our use-case do not produce mtdblock, maybe there are some imbalances
> of get/put?
> > I have somewhere version with pr_debug after every kref_get/put. That may
> help to catch where
> > it missed, I hope.
> 
> I believe mtdblock is the good citizen here. Just disable
> CONFIG_MTD_BLOCK from your configuration and you will likely observe
> the same issue, just a bit narrowed, perhaps. Indeed, if you manage to
> follow all the get/put calls it can help to find an imbalance.
> 
> Thanks,
> Miquèl

Miquel, do you have CONFIG_MTD_PARTITIONED_MASTER set in your config?

-- 
Thanks,
Sasha
  
Miquel Raynal July 24, 2023, 11:51 a.m. UTC | #7
Hi Alexander,

alexander.usyskin@intel.com wrote on Mon, 24 Jul 2023 11:43:59 +0000:

> > > > > With this patch applied, when I load up the module, I get the same 3
> > > > > devices:
> > > > > /dev/mtd0
> > > > > /dev/mtd0ro
> > > > > /dev/mtdblock0
> > > > >
> > > > > Upon removal, the below 2 devices still hang around:
> > > > > /dev/mtd0
> > > > > /dev/mtd0ro  
> > > >  
> > > Our use-case do not produce mtdblock, maybe there are some imbalances  
> > of get/put?  
> > > I have somewhere version with pr_debug after every kref_get/put. That may  
> > help to catch where  
> > > it missed, I hope.  
> > 
> > I believe mtdblock is the good citizen here. Just disable
> > CONFIG_MTD_BLOCK from your configuration and you will likely observe
> > the same issue, just a bit narrowed, perhaps. Indeed, if you manage to
> > follow all the get/put calls it can help to find an imbalance.
> > 
> > Thanks,
> > Miquèl  
> 
> Miquel, do you have CONFIG_MTD_PARTITIONED_MASTER set in your config?

Not sure I get your question. You can enable or disable it, it should
work in both cases (yet, the handling is of course a bit different as
the top level device will be retained/not retained).

Thanks,
Miquèl
  
Usyskin, Alexander July 24, 2023, 12:04 p.m. UTC | #8
Hi Miquel,
> 
> Hi Alexander,
> 
> alexander.usyskin@intel.com wrote on Mon, 24 Jul 2023 11:43:59 +0000:
> 
> > > > > > With this patch applied, when I load up the module, I get the same 3
> > > > > > devices:
> > > > > > /dev/mtd0
> > > > > > /dev/mtd0ro
> > > > > > /dev/mtdblock0
> > > > > >
> > > > > > Upon removal, the below 2 devices still hang around:
> > > > > > /dev/mtd0
> > > > > > /dev/mtd0ro
> > > > >
> > > > Our use-case do not produce mtdblock, maybe there are some
> imbalances
> > > of get/put?
> > > > I have somewhere version with pr_debug after every kref_get/put. That
> may
> > > help to catch where
> > > > it missed, I hope.
> > >
> > > I believe mtdblock is the good citizen here. Just disable
> > > CONFIG_MTD_BLOCK from your configuration and you will likely observe
> > > the same issue, just a bit narrowed, perhaps. Indeed, if you manage to
> > > follow all the get/put calls it can help to find an imbalance.
> > >
> > > Thanks,
> > > Miquèl
> >
> > Miquel, do you have CONFIG_MTD_PARTITIONED_MASTER set in your
> config?
> 
> Not sure I get your question. You can enable or disable it, it should
> work in both cases (yet, the handling is of course a bit different as
> the top level device will be retained/not retained).
> 
> Thanks,
> Miquèl

I'm trying to understand why I can't reproduce the problem in my scenario.
I found an important difference in upstreamed patch and internal version:
The IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) check around kref_get/put does not exists in the internal tree.
The code before my patch do not have such check, so I tend to assume that this check should be removed.
If you reproduce happens with CONFIG_MTD_PARTITIONED_MASTER disabled that may explain problems that you see.

-- 
Thanks,
Sasha
  
Usyskin, Alexander July 25, 2023, 12:50 p.m. UTC | #9
Hi
> 
> Hi Miquel,
> >
> > Hi Alexander,
> >
> > alexander.usyskin@intel.com wrote on Mon, 24 Jul 2023 11:43:59 +0000:
> >
> > > > > > > With this patch applied, when I load up the module, I get the same 3
> > > > > > > devices:
> > > > > > > /dev/mtd0
> > > > > > > /dev/mtd0ro
> > > > > > > /dev/mtdblock0
> > > > > > >
> > > > > > > Upon removal, the below 2 devices still hang around:
> > > > > > > /dev/mtd0
> > > > > > > /dev/mtd0ro
> > > > > >
> > > > > Our use-case do not produce mtdblock, maybe there are some
> > imbalances
> > > > of get/put?
> > > > > I have somewhere version with pr_debug after every kref_get/put. That
> > may
> > > > help to catch where
> > > > > it missed, I hope.
> > > >
> > > > I believe mtdblock is the good citizen here. Just disable
> > > > CONFIG_MTD_BLOCK from your configuration and you will likely observe
> > > > the same issue, just a bit narrowed, perhaps. Indeed, if you manage to
> > > > follow all the get/put calls it can help to find an imbalance.
> > > >
> > > > Thanks,
> > > > Miquèl
> > >
> > > Miquel, do you have CONFIG_MTD_PARTITIONED_MASTER set in your
> > config?
> >
> > Not sure I get your question. You can enable or disable it, it should
> > work in both cases (yet, the handling is of course a bit different as
> > the top level device will be retained/not retained).
> >
> > Thanks,
> > Miquèl
> 
> I'm trying to understand why I can't reproduce the problem in my scenario.
> I found an important difference in upstreamed patch and internal version:
> The IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) check around
> kref_get/put does not exists in the internal tree.
> The code before my patch do not have such check, so I tend to assume that
> this check should be removed.
> If you reproduce happens with CONFIG_MTD_PARTITIONED_MASTER
> disabled that may explain problems that you see.
> 
> --
> Thanks,
> Sasha
> 

I've tried to reproduce this with latest Linux 6.5-rc1 and my two patches.
The manual modprobe mtdblock creates mtdblock0 over my partitions too.
I can't reproduce problem neither with MTD_PARTITIONED_MASTER nor without.

Let's try to debug on your system, can you enable dynamic debug for mtd subsystem,
reproduce and publish dmesg?

The prints for kref get/put can be added as below:

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 2466ea466466..374835831428 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1242,10 +1242,13 @@ int __get_mtd_device(struct mtd_info *mtd)
        }

        kref_get(&mtd->refcnt);
+       pr_debug("get mtd %s %d\n", mtd->name, kref_read(&mtd->refcnt));

        while (mtd->parent) {
-               if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd->parent != master)
+               if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd->parent != master) {
                        kref_get(&mtd->parent->refcnt);
+                       pr_debug("get mtd %s %d\n", mtd->parent->name, kref_read(&mtd->parent->refcnt));
+               }
                mtd = mtd->parent;
        }

@@ -1335,12 +1338,15 @@ void __put_mtd_device(struct mtd_info *mtd)
        while (mtd != master) {
                struct mtd_info *parent = mtd->parent;

+               pr_debug("put mtd %s %d\n", mtd->name, kref_read(&mtd->refcnt));
                kref_put(&mtd->refcnt, mtd_device_release);
                mtd = parent;
        }

-       if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
+       if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
+               pr_debug("put mtd %s %d\n", master->name, kref_read(&master->refcnt));
                kref_put(&master->refcnt, mtd_device_release);
+       }

        module_put(master->owner);


-- 
Thanks,
Sasha
  
Miquel Raynal July 27, 2023, 6:19 a.m. UTC | #10
Hi Alexander,

alexander.usyskin@intel.com wrote on Tue, 25 Jul 2023 12:50:04 +0000:

> Hi
> > 
> > Hi Miquel,  
> > >
> > > Hi Alexander,
> > >
> > > alexander.usyskin@intel.com wrote on Mon, 24 Jul 2023 11:43:59 +0000:
> > >  
> > > > > > > > With this patch applied, when I load up the module, I get the same 3
> > > > > > > > devices:
> > > > > > > > /dev/mtd0
> > > > > > > > /dev/mtd0ro
> > > > > > > > /dev/mtdblock0
> > > > > > > >
> > > > > > > > Upon removal, the below 2 devices still hang around:
> > > > > > > > /dev/mtd0
> > > > > > > > /dev/mtd0ro  
> > > > > > >  
> > > > > > Our use-case do not produce mtdblock, maybe there are some  
> > > imbalances  
> > > > > of get/put?  
> > > > > > I have somewhere version with pr_debug after every kref_get/put. That  
> > > may  
> > > > > help to catch where  
> > > > > > it missed, I hope.  
> > > > >
> > > > > I believe mtdblock is the good citizen here. Just disable
> > > > > CONFIG_MTD_BLOCK from your configuration and you will likely observe
> > > > > the same issue, just a bit narrowed, perhaps. Indeed, if you manage to
> > > > > follow all the get/put calls it can help to find an imbalance.
> > > > >
> > > > > Thanks,
> > > > > Miquèl  
> > > >
> > > > Miquel, do you have CONFIG_MTD_PARTITIONED_MASTER set in your  
> > > config?
> > >
> > > Not sure I get your question. You can enable or disable it, it should
> > > work in both cases (yet, the handling is of course a bit different as
> > > the top level device will be retained/not retained).
> > >
> > > Thanks,
> > > Miquèl  
> > 
> > I'm trying to understand why I can't reproduce the problem in my scenario.
> > I found an important difference in upstreamed patch and internal version:
> > The IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) check around
> > kref_get/put does not exists in the internal tree.
> > The code before my patch do not have such check, so I tend to assume that
> > this check should be removed.
> > If you reproduce happens with CONFIG_MTD_PARTITIONED_MASTER
> > disabled that may explain problems that you see.
> > 
> > --
> > Thanks,
> > Sasha
> >   
> 
> I've tried to reproduce this with latest Linux 6.5-rc1 and my two patches.
> The manual modprobe mtdblock creates mtdblock0 over my partitions too.
> I can't reproduce problem neither with MTD_PARTITIONED_MASTER nor without.
> 
> Let's try to debug on your system, can you enable dynamic debug for mtd subsystem,
> reproduce and publish dmesg?
> 
> The prints for kref get/put can be added as below:
> 
> diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
> index 2466ea466466..374835831428 100644
> --- a/drivers/mtd/mtdcore.c
> +++ b/drivers/mtd/mtdcore.c
> @@ -1242,10 +1242,13 @@ int __get_mtd_device(struct mtd_info *mtd)
>         }
> 
>         kref_get(&mtd->refcnt);
> +       pr_debug("get mtd %s %d\n", mtd->name, kref_read(&mtd->refcnt));
> 
>         while (mtd->parent) {
> -               if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd->parent != master)
> +               if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd->parent != master) {
>                         kref_get(&mtd->parent->refcnt);
> +                       pr_debug("get mtd %s %d\n", mtd->parent->name, kref_read(&mtd->parent->refcnt));
> +               }
>                 mtd = mtd->parent;
>         }
> 
> @@ -1335,12 +1338,15 @@ void __put_mtd_device(struct mtd_info *mtd)
>         while (mtd != master) {
>                 struct mtd_info *parent = mtd->parent;
> 
> +               pr_debug("put mtd %s %d\n", mtd->name, kref_read(&mtd->refcnt));
>                 kref_put(&mtd->refcnt, mtd_device_release);
>                 mtd = parent;
>         }
> 
> -       if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
> +       if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
> +               pr_debug("put mtd %s %d\n", master->name, kref_read(&master->refcnt));
>                 kref_put(&master->refcnt, mtd_device_release);
> +       }
> 
>         module_put(master->owner);
> 
> 

Could this be helpful?

https://lore.kernel.org/all/20230725215539.3135304-1-zhangxiaoxu5@huawei.com/

If you successfully test it, please send your Tested-by.

Thanks,
Miquèl
  
Tomas Winkler July 27, 2023, 6:32 a.m. UTC | #11
> 
> Hi Alexander,
> 
> alexander.usyskin@intel.com wrote on Tue, 25 Jul 2023 12:50:04 +0000:
> 
> > Hi
> > >
> > > Hi Miquel,
> > > >
> > > > Hi Alexander,
> > > >
> > > > alexander.usyskin@intel.com wrote on Mon, 24 Jul 2023 11:43:59
> +0000:
> > > >
> > > > > > > > > With this patch applied, when I load up the module, I
> > > > > > > > > get the same 3
> > > > > > > > > devices:
> > > > > > > > > /dev/mtd0
> > > > > > > > > /dev/mtd0ro
> > > > > > > > > /dev/mtdblock0
> > > > > > > > >
> > > > > > > > > Upon removal, the below 2 devices still hang around:
> > > > > > > > > /dev/mtd0
> > > > > > > > > /dev/mtd0ro
> > > > > > > >
> > > > > > > Our use-case do not produce mtdblock, maybe there are some
> > > > imbalances
> > > > > > of get/put?
> > > > > > > I have somewhere version with pr_debug after every
> > > > > > > kref_get/put. That
> > > > may
> > > > > > help to catch where
> > > > > > > it missed, I hope.
> > > > > >
> > > > > > I believe mtdblock is the good citizen here. Just disable
> > > > > > CONFIG_MTD_BLOCK from your configuration and you will likely
> > > > > > observe the same issue, just a bit narrowed, perhaps. Indeed,
> > > > > > if you manage to follow all the get/put calls it can help to find an
> imbalance.
> > > > > >
> > > > > > Thanks,
> > > > > > Miquèl
> > > > >
> > > > > Miquel, do you have CONFIG_MTD_PARTITIONED_MASTER set in your
> > > > config?
> > > >
> > > > Not sure I get your question. You can enable or disable it, it
> > > > should work in both cases (yet, the handling is of course a bit
> > > > different as the top level device will be retained/not retained).
> > > >
> > > > Thanks,
> > > > Miquèl
> > >
> > > I'm trying to understand why I can't reproduce the problem in my
> scenario.
> > > I found an important difference in upstreamed patch and internal version:
> > > The IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) check around
> > > kref_get/put does not exists in the internal tree.
> > > The code before my patch do not have such check, so I tend to assume
> > > that this check should be removed.
> > > If you reproduce happens with CONFIG_MTD_PARTITIONED_MASTER
> disabled
> > > that may explain problems that you see.
> > >
> > > --
> > > Thanks,
> > > Sasha
> > >
> >
> > I've tried to reproduce this with latest Linux 6.5-rc1 and my two patches.
> > The manual modprobe mtdblock creates mtdblock0 over my partitions too.
> > I can't reproduce problem neither with MTD_PARTITIONED_MASTER nor
> without.
> >
> > Let's try to debug on your system, can you enable dynamic debug for
> > mtd subsystem, reproduce and publish dmesg?
> >
> > The prints for kref get/put can be added as below:
> >
> > diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index
> > 2466ea466466..374835831428 100644
> > --- a/drivers/mtd/mtdcore.c
> > +++ b/drivers/mtd/mtdcore.c
> > @@ -1242,10 +1242,13 @@ int __get_mtd_device(struct mtd_info *mtd)
> >         }
> >
> >         kref_get(&mtd->refcnt);
> > +       pr_debug("get mtd %s %d\n", mtd->name,
> > + kref_read(&mtd->refcnt));
> >
> >         while (mtd->parent) {
> > -               if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd-
> >parent != master)
> > +               if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) ||
> > + mtd->parent != master) {
> >                         kref_get(&mtd->parent->refcnt);
> > +                       pr_debug("get mtd %s %d\n", mtd->parent->name,
> kref_read(&mtd->parent->refcnt));
> > +               }
> >                 mtd = mtd->parent;
> >         }
> >
> > @@ -1335,12 +1338,15 @@ void __put_mtd_device(struct mtd_info
> *mtd)
> >         while (mtd != master) {
> >                 struct mtd_info *parent = mtd->parent;
> >
> > +               pr_debug("put mtd %s %d\n", mtd->name,
> > + kref_read(&mtd->refcnt));
> >                 kref_put(&mtd->refcnt, mtd_device_release);
> >                 mtd = parent;
> >         }
> >
> > -       if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
> > +       if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
> > +               pr_debug("put mtd %s %d\n", master->name,
> > + kref_read(&master->refcnt));
> >                 kref_put(&master->refcnt, mtd_device_release);
> > +       }
> >
> >         module_put(master->owner);
> >
> >
> 
> Could this be helpful?
> 
> https://lore.kernel.org/all/20230725215539.3135304-1-
> zhangxiaoxu5@huawei.com/
> 
> If you successfully test it, please send your Tested-by.
In the first glance it doesn't look correct, we have the reproduced using kasan, so hopefully the fix will follow, shortly. 
Thanks
Tomas
  
Miquel Raynal July 27, 2023, 6:55 a.m. UTC | #12
Hi Tomas,

tomas.winkler@intel.com wrote on Thu, 27 Jul 2023 06:32:39 +0000:

> > 
> > Hi Alexander,
> > 
> > alexander.usyskin@intel.com wrote on Tue, 25 Jul 2023 12:50:04 +0000:
> > 
> > > Hi
> > > >
> > > > Hi Miquel,
> > > > >
> > > > > Hi Alexander,
> > > > >
> > > > > alexander.usyskin@intel.com wrote on Mon, 24 Jul 2023 11:43:59
> > +0000:
> > > > >
> > > > > > > > > > With this patch applied, when I load up the module, I
> > > > > > > > > > get the same 3
> > > > > > > > > > devices:
> > > > > > > > > > /dev/mtd0
> > > > > > > > > > /dev/mtd0ro
> > > > > > > > > > /dev/mtdblock0
> > > > > > > > > >
> > > > > > > > > > Upon removal, the below 2 devices still hang around:
> > > > > > > > > > /dev/mtd0
> > > > > > > > > > /dev/mtd0ro
> > > > > > > > >
> > > > > > > > Our use-case do not produce mtdblock, maybe there are some
> > > > > imbalances
> > > > > > > of get/put?
> > > > > > > > I have somewhere version with pr_debug after every
> > > > > > > > kref_get/put. That
> > > > > may
> > > > > > > help to catch where
> > > > > > > > it missed, I hope.
> > > > > > >
> > > > > > > I believe mtdblock is the good citizen here. Just disable
> > > > > > > CONFIG_MTD_BLOCK from your configuration and you will likely
> > > > > > > observe the same issue, just a bit narrowed, perhaps. Indeed,
> > > > > > > if you manage to follow all the get/put calls it can help to find an
> > imbalance.
> > > > > > >
> > > > > > > Thanks,
> > > > > > > Miquèl
> > > > > >
> > > > > > Miquel, do you have CONFIG_MTD_PARTITIONED_MASTER set in your
> > > > > config?
> > > > >
> > > > > Not sure I get your question. You can enable or disable it, it
> > > > > should work in both cases (yet, the handling is of course a bit
> > > > > different as the top level device will be retained/not retained).
> > > > >
> > > > > Thanks,
> > > > > Miquèl
> > > >
> > > > I'm trying to understand why I can't reproduce the problem in my
> > scenario.
> > > > I found an important difference in upstreamed patch and internal version:
> > > > The IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) check around
> > > > kref_get/put does not exists in the internal tree.
> > > > The code before my patch do not have such check, so I tend to assume
> > > > that this check should be removed.
> > > > If you reproduce happens with CONFIG_MTD_PARTITIONED_MASTER
> > disabled
> > > > that may explain problems that you see.
> > > >
> > > > --
> > > > Thanks,
> > > > Sasha
> > > >
> > >
> > > I've tried to reproduce this with latest Linux 6.5-rc1 and my two patches.
> > > The manual modprobe mtdblock creates mtdblock0 over my partitions too.
> > > I can't reproduce problem neither with MTD_PARTITIONED_MASTER nor
> > without.
> > >
> > > Let's try to debug on your system, can you enable dynamic debug for
> > > mtd subsystem, reproduce and publish dmesg?
> > >
> > > The prints for kref get/put can be added as below:
> > >
> > > diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index
> > > 2466ea466466..374835831428 100644
> > > --- a/drivers/mtd/mtdcore.c
> > > +++ b/drivers/mtd/mtdcore.c
> > > @@ -1242,10 +1242,13 @@ int __get_mtd_device(struct mtd_info *mtd)
> > >         }
> > >
> > >         kref_get(&mtd->refcnt);
> > > +       pr_debug("get mtd %s %d\n", mtd->name,
> > > + kref_read(&mtd->refcnt));
> > >
> > >         while (mtd->parent) {
> > > -               if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd-
> > >parent != master)
> > > +               if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) ||
> > > + mtd->parent != master) {
> > >                         kref_get(&mtd->parent->refcnt);
> > > +                       pr_debug("get mtd %s %d\n", mtd->parent->name,
> > kref_read(&mtd->parent->refcnt));
> > > +               }
> > >                 mtd = mtd->parent;
> > >         }
> > >
> > > @@ -1335,12 +1338,15 @@ void __put_mtd_device(struct mtd_info
> > *mtd)
> > >         while (mtd != master) {
> > >                 struct mtd_info *parent = mtd->parent;
> > >
> > > +               pr_debug("put mtd %s %d\n", mtd->name,
> > > + kref_read(&mtd->refcnt));
> > >                 kref_put(&mtd->refcnt, mtd_device_release);
> > >                 mtd = parent;
> > >         }
> > >
> > > -       if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
> > > +       if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
> > > +               pr_debug("put mtd %s %d\n", master->name,
> > > + kref_read(&master->refcnt));
> > >                 kref_put(&master->refcnt, mtd_device_release);
> > > +       }
> > >
> > >         module_put(master->owner);
> > >
> > >
> > 
> > Could this be helpful?
> > 
> > https://lore.kernel.org/all/20230725215539.3135304-1-
> > zhangxiaoxu5@huawei.com/
> > 
> > If you successfully test it, please send your Tested-by.
> In the first glance it doesn't look correct, we have the reproduced using kasan, so hopefully the fix will follow, shortly. 
> Thanks
> Tomas
> 

Why is this fix not correct?

Are you currently writing a fix yourself?

Thanks,
Miquèl
  

Patch

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index abf4cb58a8ab..84bd1878367d 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -93,10 +93,33 @@  static void mtd_release(struct device *dev)
 	struct mtd_info *mtd = dev_get_drvdata(dev);
 	dev_t index = MTD_DEVT(mtd->index);
 
+	if (mtd_is_partition(mtd))
+		release_mtd_partition(mtd);
+
 	/* remove /dev/mtdXro node */
 	device_destroy(&mtd_class, index + 1);
 }
 
+static void mtd_device_release(struct kref *kref)
+{
+	struct mtd_info *mtd = container_of(kref, struct mtd_info, refcnt);
+
+	debugfs_remove_recursive(mtd->dbg.dfs_dir);
+
+	/* Try to remove the NVMEM provider */
+	nvmem_unregister(mtd->nvmem);
+
+	device_unregister(&mtd->dev);
+
+	/* Clear dev so mtd can be safely re-registered later if desired */
+	memset(&mtd->dev, 0, sizeof(mtd->dev));
+
+	idr_remove(&mtd_idr, mtd->index);
+	of_node_put(mtd_get_of_node(mtd));
+
+	module_put(THIS_MODULE);
+}
+
 #define MTD_DEVICE_ATTR_RO(name) \
 static DEVICE_ATTR(name, 0444, mtd_##name##_show, NULL)
 
@@ -666,7 +689,7 @@  int add_mtd_device(struct mtd_info *mtd)
 	}
 
 	mtd->index = i;
-	mtd->usecount = 0;
+	kref_init(&mtd->refcnt);
 
 	/* default value if not set by driver */
 	if (mtd->bitflip_threshold == 0)
@@ -779,7 +802,6 @@  int del_mtd_device(struct mtd_info *mtd)
 {
 	int ret;
 	struct mtd_notifier *not;
-	struct device_node *mtd_of_node;
 
 	mutex_lock(&mtd_table_mutex);
 
@@ -793,28 +815,8 @@  int del_mtd_device(struct mtd_info *mtd)
 	list_for_each_entry(not, &mtd_notifiers, list)
 		not->remove(mtd);
 
-	if (mtd->usecount) {
-		printk(KERN_NOTICE "Removing MTD device #%d (%s) with use count %d\n",
-		       mtd->index, mtd->name, mtd->usecount);
-		ret = -EBUSY;
-	} else {
-		mtd_of_node = mtd_get_of_node(mtd);
-		debugfs_remove_recursive(mtd->dbg.dfs_dir);
-
-		/* Try to remove the NVMEM provider */
-		nvmem_unregister(mtd->nvmem);
-
-		device_unregister(&mtd->dev);
-
-		/* Clear dev so mtd can be safely re-registered later if desired */
-		memset(&mtd->dev, 0, sizeof(mtd->dev));
-
-		idr_remove(&mtd_idr, mtd->index);
-		of_node_put(mtd_of_node);
-
-		module_put(THIS_MODULE);
-		ret = 0;
-	}
+	kref_put(&mtd->refcnt, mtd_device_release);
+	ret = 0;
 
 out_error:
 	mutex_unlock(&mtd_table_mutex);
@@ -1228,19 +1230,21 @@  int __get_mtd_device(struct mtd_info *mtd)
 	if (!try_module_get(master->owner))
 		return -ENODEV;
 
+	kref_get(&mtd->refcnt);
+
 	if (master->_get_device) {
 		err = master->_get_device(mtd);
 
 		if (err) {
+			kref_put(&mtd->refcnt, mtd_device_release);
 			module_put(master->owner);
 			return err;
 		}
 	}
 
-	master->usecount++;
-
 	while (mtd->parent) {
-		mtd->usecount++;
+		if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd->parent != master)
+			kref_get(&mtd->parent->refcnt);
 		mtd = mtd->parent;
 	}
 
@@ -1327,18 +1331,20 @@  void __put_mtd_device(struct mtd_info *mtd)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
 
-	while (mtd->parent) {
-		--mtd->usecount;
-		BUG_ON(mtd->usecount < 0);
-		mtd = mtd->parent;
-	}
+	while (mtd != master) {
+		struct mtd_info *parent = mtd->parent;
 
-	master->usecount--;
+		kref_put(&mtd->refcnt, mtd_device_release);
+		mtd = parent;
+	}
 
 	if (master->_put_device)
 		master->_put_device(master);
 
 	module_put(master->owner);
+
+	if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
+		kref_put(&master->refcnt, mtd_device_release);
 }
 EXPORT_SYMBOL_GPL(__put_mtd_device);
 
diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h
index b5eefeabf310..b014861a06a6 100644
--- a/drivers/mtd/mtdcore.h
+++ b/drivers/mtd/mtdcore.h
@@ -12,6 +12,7 @@  int __must_check add_mtd_device(struct mtd_info *mtd);
 int del_mtd_device(struct mtd_info *mtd);
 int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int);
 int del_mtd_partitions(struct mtd_info *);
+void release_mtd_partition(struct mtd_info *mtd);
 
 struct mtd_partitions;
 
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index a46affbb037d..23483db8f30c 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -32,6 +32,12 @@  static inline void free_partition(struct mtd_info *mtd)
 	kfree(mtd);
 }
 
+void release_mtd_partition(struct mtd_info *mtd)
+{
+	WARN_ON(!list_empty(&mtd->part.node));
+	free_partition(mtd);
+}
+
 static struct mtd_info *allocate_partition(struct mtd_info *parent,
 					   const struct mtd_partition *part,
 					   int partno, uint64_t cur_offset)
@@ -309,13 +315,11 @@  static int __mtd_del_partition(struct mtd_info *mtd)
 
 	sysfs_remove_files(&mtd->dev.kobj, mtd_partition_attrs);
 
+	list_del_init(&mtd->part.node);
 	err = del_mtd_device(mtd);
 	if (err)
 		return err;
 
-	list_del(&mtd->part.node);
-	free_partition(mtd);
-
 	return 0;
 }
 
@@ -333,6 +337,7 @@  static int __del_mtd_partitions(struct mtd_info *mtd)
 			__del_mtd_partitions(child);
 
 		pr_info("Deleting %s MTD partition\n", child->name);
+		list_del_init(&child->part.node);
 		ret = del_mtd_device(child);
 		if (ret < 0) {
 			pr_err("Error when deleting partition \"%s\" (%d)\n",
@@ -340,9 +345,6 @@  static int __del_mtd_partitions(struct mtd_info *mtd)
 			err = ret;
 			continue;
 		}
-
-		list_del(&child->part.node);
-		free_partition(child);
 	}
 
 	return err;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 7c58c44662b8..914a9f974baa 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -379,7 +379,7 @@  struct mtd_info {
 
 	struct module *owner;
 	struct device dev;
-	int usecount;
+	struct kref refcnt;
 	struct mtd_debug_info dbg;
 	struct nvmem_device *nvmem;
 	struct nvmem_device *otp_user_nvmem;