mm: shrinkers: fix race condition on debugfs cleanup

Message ID 20230503013232.299211-1-joanbrugueram@gmail.com
State New
Headers
Series mm: shrinkers: fix race condition on debugfs cleanup |

Commit Message

Joan Bruguera Micó May 3, 2023, 1:32 a.m. UTC
  When something registers and unregisters many shrinkers, such as:
    for x in $(seq 10000); do unshare -Ui true; done

Sometimes the following error is printed to the kernel log:
    debugfs: Directory '...' with parent 'shrinker' already present!

This occurs since commit badc28d4924b ("mm: shrinkers: fix deadlock in
shrinker debugfs") / v6.2: Since the call to `debugfs_remove_recursive`
was moved outside the `shrinker_rwsem`/`shrinker_mutex` lock, but the call
 to `ida_free` stayed inside, a newly registered shrinker can be
 re-assigned that ID and attempt to create the debugfs directory before
the directory from the previous shrinker has been removed.

The locking changes in commit f95bdb700bc6 ("mm: vmscan: make global slab
shrink lockless") made the race condition more likely, though it existed
before then.

Commit badc28d4924b ("mm: shrinkers: fix deadlock in shrinker debugfs")
could be reverted since the issue is addressed should no longer occur
since the count and scan operations are lockless since commit 20cd1892fcc3
("mm: shrinkers: make count and scan in shrinker debugfs lockless").
However, since this is a contended lock, prefer instead moving `ida_free`
outside the lock to avoid the race.

Fixes: badc28d4924b ("mm: shrinkers: fix deadlock in shrinker debugfs")
Signed-off-by: Joan Bruguera Micó <joanbrugueram@gmail.com>
---
 include/linux/shrinker.h | 13 +++++++++++--
 mm/shrinker_debug.c      | 15 ++++++++++-----
 mm/vmscan.c              |  5 +++--
 3 files changed, 24 insertions(+), 9 deletions(-)
  

Comments

Qi Zheng May 3, 2023, 5:37 a.m. UTC | #1
On 2023/5/3 09:32, Joan Bruguera Micó wrote:
> When something registers and unregisters many shrinkers, such as:
>      for x in $(seq 10000); do unshare -Ui true; done
> 
> Sometimes the following error is printed to the kernel log:
>      debugfs: Directory '...' with parent 'shrinker' already present!
> 
> This occurs since commit badc28d4924b ("mm: shrinkers: fix deadlock in
> shrinker debugfs") / v6.2: Since the call to `debugfs_remove_recursive`
> was moved outside the `shrinker_rwsem`/`shrinker_mutex` lock, but the call
>   to `ida_free` stayed inside, a newly registered shrinker can be
>   re-assigned that ID and attempt to create the debugfs directory before
> the directory from the previous shrinker has been removed.
> 
> The locking changes in commit f95bdb700bc6 ("mm: vmscan: make global slab
> shrink lockless") made the race condition more likely, though it existed
> before then.
> 
> Commit badc28d4924b ("mm: shrinkers: fix deadlock in shrinker debugfs")
> could be reverted since the issue is addressed should no longer occur
> since the count and scan operations are lockless since commit 20cd1892fcc3
> ("mm: shrinkers: make count and scan in shrinker debugfs lockless").
> However, since this is a contended lock, prefer instead moving `ida_free`
> outside the lock to avoid the race.
> 
> Fixes: badc28d4924b ("mm: shrinkers: fix deadlock in shrinker debugfs")
> Signed-off-by: Joan Bruguera Micó <joanbrugueram@gmail.com>
> ---
>   include/linux/shrinker.h | 13 +++++++++++--
>   mm/shrinker_debug.c      | 15 ++++++++++-----
>   mm/vmscan.c              |  5 +++--
>   3 files changed, 24 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
> index 7bde8e1c228a..224293b2dd06 100644
> --- a/include/linux/shrinker.h
> +++ b/include/linux/shrinker.h
> @@ -107,7 +107,10 @@ extern void synchronize_shrinkers(void);
>   
>   #ifdef CONFIG_SHRINKER_DEBUG
>   extern int shrinker_debugfs_add(struct shrinker *shrinker);
> -extern struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker);
> +extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
> +					      int *debugfs_id);
> +extern void shrinker_debugfs_remove(struct dentry *debugfs_entry,
> +				    int debugfs_id);
>   extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker,
>   						  const char *fmt, ...);
>   #else /* CONFIG_SHRINKER_DEBUG */
> @@ -115,10 +118,16 @@ static inline int shrinker_debugfs_add(struct shrinker *shrinker)
>   {
>   	return 0;
>   }
> -static inline struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
> +static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
> +						     int *debugfs_id)
>   {
> +	*debugfs_id = -1;
>   	return NULL;
>   }
> +static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry,
> +					   int debugfs_id)
> +{
> +}
>   static inline __printf(2, 3)
>   int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
>   {
> diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c
> index 3f83b10c5031..fe10436d9911 100644
> --- a/mm/shrinker_debug.c
> +++ b/mm/shrinker_debug.c
> @@ -237,7 +237,8 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
>   }
>   EXPORT_SYMBOL(shrinker_debugfs_rename);
>   
> -struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
> +struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
> +				       int *debugfs_id)
>   {
>   	struct dentry *entry = shrinker->debugfs_entry;
>   
> @@ -246,14 +247,18 @@ struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
>   	kfree_const(shrinker->name);
>   	shrinker->name = NULL;
>   
> -	if (entry) {
> -		ida_free(&shrinker_debugfs_ida, shrinker->debugfs_id);
> -		shrinker->debugfs_entry = NULL;
> -	}
> +	*debugfs_id = entry ? shrinker->debugfs_id : -1;
> +	shrinker->debugfs_entry = NULL;
>   
>   	return entry;
>   }
>   
> +void shrinker_debugfs_remove(struct dentry *debugfs_entry, int debugfs_id)
> +{

It would be better to add a check:

	if (!debugfs_entry)
		return;

> +	debugfs_remove_recursive(debugfs_entry);
> +	ida_free(&shrinker_debugfs_ida, debugfs_id);
> +}
> +
>   static int __init shrinker_debugfs_init(void)
>   {
>   	struct shrinker *shrinker;
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 5bde07409303..c7d0faa343e0 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -805,6 +805,7 @@ EXPORT_SYMBOL(register_shrinker);
>   void unregister_shrinker(struct shrinker *shrinker)
>   {
>   	struct dentry *debugfs_entry;
> +	int debugfs_id;
>   
>   	if (!(shrinker->flags & SHRINKER_REGISTERED))
>   		return;
> @@ -814,13 +815,13 @@ void unregister_shrinker(struct shrinker *shrinker)
>   	shrinker->flags &= ~SHRINKER_REGISTERED;
>   	if (shrinker->flags & SHRINKER_MEMCG_AWARE)
>   		unregister_memcg_shrinker(shrinker);
> -	debugfs_entry = shrinker_debugfs_remove(shrinker);
> +	debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id);
>   	mutex_unlock(&shrinker_mutex);
>   
>   	atomic_inc(&shrinker_srcu_generation);
>   	synchronize_srcu(&shrinker_srcu);
>   
> -	debugfs_remove_recursive(debugfs_entry);
> +	shrinker_debugfs_remove(debugfs_entry, debugfs_id);
>   
>   	kfree(shrinker->nr_deferred);
>   	shrinker->nr_deferred = NULL;
  
Joan Bruguera Micó May 21, 2023, 1:57 p.m. UTC | #2
On 2023/5/3 13:37, Qi Zheng wrote:
> > +void shrinker_debugfs_remove(struct dentry *debugfs_entry, int debugfs_id)
> > +{
> 
> It would be better to add a check:
> 
> 	if (!debugfs_entry)
> 		return;
> 
> > +	debugfs_remove_recursive(debugfs_entry);
> > +	ida_free(&shrinker_debugfs_ida, debugfs_id);
> > +}

As a practical matter, both `debugfs_remove_recursive(NULL)` and
`ida_free(_, -1);` are documented as no-ops, see:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/debugfs/inode.c?id=0dd2a6fb1e34d6dcb96806bc6b111388ad324722#n748
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fc82bbf4dede758007763867d0282353c06d1121

Sorry for the late reply (the patch already reached the mainline tree).
  
Steven Price July 3, 2023, 11:14 a.m. UTC | #3
On 21/05/2023 14:57, Joan Bruguera Micó wrote:
> On 2023/5/3 13:37, Qi Zheng wrote:
>>> +void shrinker_debugfs_remove(struct dentry *debugfs_entry, int debugfs_id)
>>> +{
>>
>> It would be better to add a check:
>>
>> 	if (!debugfs_entry)
>> 		return;
>>
>>> +	debugfs_remove_recursive(debugfs_entry);
>>> +	ida_free(&shrinker_debugfs_ida, debugfs_id);
>>> +}
> 
> As a practical matter, both `debugfs_remove_recursive(NULL)` and
> `ida_free(_, -1);` are documented as no-ops, see:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/debugfs/inode.c?id=0dd2a6fb1e34d6dcb96806bc6b111388ad324722#n748
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fc82bbf4dede758007763867d0282353c06d1121
> 
> Sorry for the late reply (the patch already reached the mainline tree).

Note that commit 69cb69ea5542 ("ida: Remove assertions that an ID was
allocated") currently in linux-next makes ida_free(..., -1) illegal.

I see a crash on boot on my test platform (Firefly-RK3288) with
linux-next because of this. Qi's suggested change fixes this.

I'm not sure whether Matthew's change removing the ((int)id < 0) check
was intentional or not. Reinstating that check would also fix the crash.

Steve
  
Joan Bruguera Micó July 26, 2023, 12:08 a.m. UTC | #4
Hmmm, indeed linux-next crashes with a null pointer dereference when
calling `ida_free(..., -1)`.

It appears to me that 69cb69ea5542 ("ida: Remove assertions that an ID was
allocated") didn't intend to make `ida_free(..., -1)` invalid; after all,
it was authored & introduced immediately after fc82bbf4dede ("ida: don't
use BUG_ON() for debugging") whose commit message calls for making it
legal, with Matthew's support.
And the referenced Bluetooth HCI code that also calls `ida_free(..., -1)`
is still there, as far as I can tell ([1]).

Rather, probably the `((int)id < 0)` was accidentally dropped, or the idea
was that it would be handled by the `not_found:` label in `ida_free`, but
for that to work you'd need to change the `!test_bit(bit, bitmap->bitmap)`
condition to `!bitmap || !test_bit(bit, bitmap->bitmap)` since otherwise
`bitmap->bitmap` is a null pointer dereference.

I will send a patch to get it fixed.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/net/bluetooth/hci_sock.c#n106
  
Matthew Wilcox July 29, 2023, 12:50 a.m. UTC | #5
On Wed, Jul 26, 2023 at 12:08:26AM +0000, Joan Bruguera Micó wrote:
> Hmmm, indeed linux-next crashes with a null pointer dereference when
> calling `ida_free(..., -1)`.
> 
> It appears to me that 69cb69ea5542 ("ida: Remove assertions that an ID was
> allocated") didn't intend to make `ida_free(..., -1)` invalid; after all,
> it was authored & introduced immediately after fc82bbf4dede ("ida: don't
> use BUG_ON() for debugging") whose commit message calls for making it
> legal, with Matthew's support.
> And the referenced Bluetooth HCI code that also calls `ida_free(..., -1)`
> is still there, as far as I can tell ([1]).
> 
> Rather, probably the `((int)id < 0)` was accidentally dropped, or the idea
> was that it would be handled by the `not_found:` label in `ida_free`, but
> for that to work you'd need to change the `!test_bit(bit, bitmap->bitmap)`
> condition to `!bitmap || !test_bit(bit, bitmap->bitmap)` since otherwise
> `bitmap->bitmap` is a null pointer dereference.

I've been in two minds about that patch ever since I wrote it.  I've
dropped it from my tree for now.

But, er, you have pointed out a bug which is that we don't handle
!bitmap properly.  That should be fixed.
  

Patch

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 7bde8e1c228a..224293b2dd06 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -107,7 +107,10 @@  extern void synchronize_shrinkers(void);
 
 #ifdef CONFIG_SHRINKER_DEBUG
 extern int shrinker_debugfs_add(struct shrinker *shrinker);
-extern struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker);
+extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
+					      int *debugfs_id);
+extern void shrinker_debugfs_remove(struct dentry *debugfs_entry,
+				    int debugfs_id);
 extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker,
 						  const char *fmt, ...);
 #else /* CONFIG_SHRINKER_DEBUG */
@@ -115,10 +118,16 @@  static inline int shrinker_debugfs_add(struct shrinker *shrinker)
 {
 	return 0;
 }
-static inline struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
+static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
+						     int *debugfs_id)
 {
+	*debugfs_id = -1;
 	return NULL;
 }
+static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry,
+					   int debugfs_id)
+{
+}
 static inline __printf(2, 3)
 int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
 {
diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c
index 3f83b10c5031..fe10436d9911 100644
--- a/mm/shrinker_debug.c
+++ b/mm/shrinker_debug.c
@@ -237,7 +237,8 @@  int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
 }
 EXPORT_SYMBOL(shrinker_debugfs_rename);
 
-struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
+struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
+				       int *debugfs_id)
 {
 	struct dentry *entry = shrinker->debugfs_entry;
 
@@ -246,14 +247,18 @@  struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
 	kfree_const(shrinker->name);
 	shrinker->name = NULL;
 
-	if (entry) {
-		ida_free(&shrinker_debugfs_ida, shrinker->debugfs_id);
-		shrinker->debugfs_entry = NULL;
-	}
+	*debugfs_id = entry ? shrinker->debugfs_id : -1;
+	shrinker->debugfs_entry = NULL;
 
 	return entry;
 }
 
+void shrinker_debugfs_remove(struct dentry *debugfs_entry, int debugfs_id)
+{
+	debugfs_remove_recursive(debugfs_entry);
+	ida_free(&shrinker_debugfs_ida, debugfs_id);
+}
+
 static int __init shrinker_debugfs_init(void)
 {
 	struct shrinker *shrinker;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5bde07409303..c7d0faa343e0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -805,6 +805,7 @@  EXPORT_SYMBOL(register_shrinker);
 void unregister_shrinker(struct shrinker *shrinker)
 {
 	struct dentry *debugfs_entry;
+	int debugfs_id;
 
 	if (!(shrinker->flags & SHRINKER_REGISTERED))
 		return;
@@ -814,13 +815,13 @@  void unregister_shrinker(struct shrinker *shrinker)
 	shrinker->flags &= ~SHRINKER_REGISTERED;
 	if (shrinker->flags & SHRINKER_MEMCG_AWARE)
 		unregister_memcg_shrinker(shrinker);
-	debugfs_entry = shrinker_debugfs_remove(shrinker);
+	debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id);
 	mutex_unlock(&shrinker_mutex);
 
 	atomic_inc(&shrinker_srcu_generation);
 	synchronize_srcu(&shrinker_srcu);
 
-	debugfs_remove_recursive(debugfs_entry);
+	shrinker_debugfs_remove(debugfs_entry, debugfs_id);
 
 	kfree(shrinker->nr_deferred);
 	shrinker->nr_deferred = NULL;