[v2] nvmem: fix registration vs use race

Message ID E1pCiUe-00457v-SQ@rmk-PC.armlinux.org.uk
State New
Headers
Series [v2] nvmem: fix registration vs use race |

Commit Message

Russell King (Oracle) Jan. 3, 2023, 2:42 p.m. UTC
  The i.MX6 CPU frequency driver sometimes fails to register at boot time
due to nvmem_cell_read_u32() sporadically returning -ENOENT.

This happens because there is a window where __nvmem_device_get() in
of_nvmem_cell_get() is able to return the nvmem device, but as cells
have been setup, nvmem_find_cell_entry_by_node() returns NULL.

The occurs because the nvmem core registration code violates one of the
fundamental principles of kernel programming: do not publish data
structures before their setup is complete.

Fix this by making nvmem core code conform with this principle.

Fixes: eace75cfdcf7 ("nvmem: Add a simple NVMEM framework for nvmem providers")
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
v2: add fixes tag

 drivers/nvmem/core.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)
  

Comments

Hector Martin Jan. 3, 2023, 3:20 p.m. UTC | #1
On 03/01/2023 23.42, Russell King (Oracle) wrote:
> The i.MX6 CPU frequency driver sometimes fails to register at boot time
> due to nvmem_cell_read_u32() sporadically returning -ENOENT.
> 
> This happens because there is a window where __nvmem_device_get() in
> of_nvmem_cell_get() is able to return the nvmem device, but as cells
> have been setup, nvmem_find_cell_entry_by_node() returns NULL.
> 
> The occurs because the nvmem core registration code violates one of the
> fundamental principles of kernel programming: do not publish data
> structures before their setup is complete.
> 
> Fix this by making nvmem core code conform with this principle.
> 
> Fixes: eace75cfdcf7 ("nvmem: Add a simple NVMEM framework for nvmem providers")
> Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
> ---
> v2: add fixes tag
> 
>  drivers/nvmem/core.c | 18 ++++++++----------
>  1 file changed, 8 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
> index 321d7d63e068..6b89fb6fa582 100644
> --- a/drivers/nvmem/core.c
> +++ b/drivers/nvmem/core.c
> @@ -835,22 +835,16 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
>  	nvmem->dev.groups = nvmem_dev_groups;
>  #endif
>  
> -	dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
> -
> -	rval = device_register(&nvmem->dev);
> -	if (rval)
> -		goto err_put_device;
> -
>  	if (nvmem->nkeepout) {
>  		rval = nvmem_validate_keepouts(nvmem);
>  		if (rval)
> -			goto err_device_del;
> +			goto err_put_device;

You can't call put_device() on a device that hasn't gone through
device_initialize() yet.

>  	}
>  
>  	if (config->compat) {
>  		rval = nvmem_sysfs_setup_compat(nvmem, config);
>  		if (rval)
> -			goto err_device_del;
> +			goto err_put_device;

Ditto.

>  	}
>  
>  	if (config->cells) {
> @@ -867,6 +861,12 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
>  	if (rval)
>  		goto err_remove_cells;
>  
> +	dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
> +
> +	rval = device_register(&nvmem->dev);
> +	if (rval)
> +		goto err_remove_cells;
> +
>  	blocking_notifier_call_chain(&nvmem_notifier, NVMEM_ADD, nvmem);
>  
>  	return nvmem;
> @@ -876,8 +876,6 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
>  err_teardown_compat:
>  	if (config->compat)
>  		nvmem_sysfs_remove_compat(nvmem, config);
> -err_device_del:
> -	device_del(&nvmem->dev);
>  err_put_device:
>  	put_device(&nvmem->dev);
>  


- Hector
  
Russell King (Oracle) Jan. 3, 2023, 3:24 p.m. UTC | #2
On Wed, Jan 04, 2023 at 12:20:15AM +0900, Hector Martin wrote:
> On 03/01/2023 23.42, Russell King (Oracle) wrote:
> > The i.MX6 CPU frequency driver sometimes fails to register at boot time
> > due to nvmem_cell_read_u32() sporadically returning -ENOENT.
> > 
> > This happens because there is a window where __nvmem_device_get() in
> > of_nvmem_cell_get() is able to return the nvmem device, but as cells
> > have been setup, nvmem_find_cell_entry_by_node() returns NULL.
> > 
> > The occurs because the nvmem core registration code violates one of the
> > fundamental principles of kernel programming: do not publish data
> > structures before their setup is complete.
> > 
> > Fix this by making nvmem core code conform with this principle.
> > 
> > Fixes: eace75cfdcf7 ("nvmem: Add a simple NVMEM framework for nvmem providers")
> > Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
> > ---
> > v2: add fixes tag
> > 
> >  drivers/nvmem/core.c | 18 ++++++++----------
> >  1 file changed, 8 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
> > index 321d7d63e068..6b89fb6fa582 100644
> > --- a/drivers/nvmem/core.c
> > +++ b/drivers/nvmem/core.c
> > @@ -835,22 +835,16 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
> >  	nvmem->dev.groups = nvmem_dev_groups;
> >  #endif
> >  
> > -	dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
> > -
> > -	rval = device_register(&nvmem->dev);
> > -	if (rval)
> > -		goto err_put_device;
> > -
> >  	if (nvmem->nkeepout) {
> >  		rval = nvmem_validate_keepouts(nvmem);
> >  		if (rval)
> > -			goto err_device_del;
> > +			goto err_put_device;
> 
> You can't call put_device() on a device that hasn't gone through
> device_initialize() yet.

Right, which is what I just realised while writing the previous reply.
We need to use device_initialize() and device_add(), so we can call
put_device() on it.

Error paths are difficult to properly test. :(
  

Patch

diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 321d7d63e068..6b89fb6fa582 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -835,22 +835,16 @@  struct nvmem_device *nvmem_register(const struct nvmem_config *config)
 	nvmem->dev.groups = nvmem_dev_groups;
 #endif
 
-	dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
-
-	rval = device_register(&nvmem->dev);
-	if (rval)
-		goto err_put_device;
-
 	if (nvmem->nkeepout) {
 		rval = nvmem_validate_keepouts(nvmem);
 		if (rval)
-			goto err_device_del;
+			goto err_put_device;
 	}
 
 	if (config->compat) {
 		rval = nvmem_sysfs_setup_compat(nvmem, config);
 		if (rval)
-			goto err_device_del;
+			goto err_put_device;
 	}
 
 	if (config->cells) {
@@ -867,6 +861,12 @@  struct nvmem_device *nvmem_register(const struct nvmem_config *config)
 	if (rval)
 		goto err_remove_cells;
 
+	dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
+
+	rval = device_register(&nvmem->dev);
+	if (rval)
+		goto err_remove_cells;
+
 	blocking_notifier_call_chain(&nvmem_notifier, NVMEM_ADD, nvmem);
 
 	return nvmem;
@@ -876,8 +876,6 @@  struct nvmem_device *nvmem_register(const struct nvmem_config *config)
 err_teardown_compat:
 	if (config->compat)
 		nvmem_sysfs_remove_compat(nvmem, config);
-err_device_del:
-	device_del(&nvmem->dev);
 err_put_device:
 	put_device(&nvmem->dev);