mm: shrinkers: fix race condition on debugfs cleanup
Commit Message
When something registers and unregisters many shrinkers, such as:
for x in $(seq 10000); do unshare -Ui true; done
Sometimes the following error is printed to the kernel log:
debugfs: Directory '...' with parent 'shrinker' already present!
This occurs since commit badc28d4924b ("mm: shrinkers: fix deadlock in
shrinker debugfs") / v6.2: Since the call to `debugfs_remove_recursive`
was moved outside the `shrinker_rwsem`/`shrinker_mutex` lock, but the call
to `ida_free` stayed inside, a newly registered shrinker can be
re-assigned that ID and attempt to create the debugfs directory before
the directory from the previous shrinker has been removed.
The locking changes in commit f95bdb700bc6 ("mm: vmscan: make global slab
shrink lockless") made the race condition more likely, though it existed
before then.
Commit badc28d4924b ("mm: shrinkers: fix deadlock in shrinker debugfs")
could be reverted since the issue is addressed should no longer occur
since the count and scan operations are lockless since commit 20cd1892fcc3
("mm: shrinkers: make count and scan in shrinker debugfs lockless").
However, since this is a contended lock, prefer instead moving `ida_free`
outside the lock to avoid the race.
Fixes: badc28d4924b ("mm: shrinkers: fix deadlock in shrinker debugfs")
Signed-off-by: Joan Bruguera Micó <joanbrugueram@gmail.com>
---
include/linux/shrinker.h | 13 +++++++++++--
mm/shrinker_debug.c | 15 ++++++++++-----
mm/vmscan.c | 5 +++--
3 files changed, 24 insertions(+), 9 deletions(-)
Comments
On 2023/5/3 09:32, Joan Bruguera Micó wrote:
> When something registers and unregisters many shrinkers, such as:
> for x in $(seq 10000); do unshare -Ui true; done
>
> Sometimes the following error is printed to the kernel log:
> debugfs: Directory '...' with parent 'shrinker' already present!
>
> This occurs since commit badc28d4924b ("mm: shrinkers: fix deadlock in
> shrinker debugfs") / v6.2: Since the call to `debugfs_remove_recursive`
> was moved outside the `shrinker_rwsem`/`shrinker_mutex` lock, but the call
> to `ida_free` stayed inside, a newly registered shrinker can be
> re-assigned that ID and attempt to create the debugfs directory before
> the directory from the previous shrinker has been removed.
>
> The locking changes in commit f95bdb700bc6 ("mm: vmscan: make global slab
> shrink lockless") made the race condition more likely, though it existed
> before then.
>
> Commit badc28d4924b ("mm: shrinkers: fix deadlock in shrinker debugfs")
> could be reverted since the issue is addressed should no longer occur
> since the count and scan operations are lockless since commit 20cd1892fcc3
> ("mm: shrinkers: make count and scan in shrinker debugfs lockless").
> However, since this is a contended lock, prefer instead moving `ida_free`
> outside the lock to avoid the race.
>
> Fixes: badc28d4924b ("mm: shrinkers: fix deadlock in shrinker debugfs")
> Signed-off-by: Joan Bruguera Micó <joanbrugueram@gmail.com>
> ---
> include/linux/shrinker.h | 13 +++++++++++--
> mm/shrinker_debug.c | 15 ++++++++++-----
> mm/vmscan.c | 5 +++--
> 3 files changed, 24 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
> index 7bde8e1c228a..224293b2dd06 100644
> --- a/include/linux/shrinker.h
> +++ b/include/linux/shrinker.h
> @@ -107,7 +107,10 @@ extern void synchronize_shrinkers(void);
>
> #ifdef CONFIG_SHRINKER_DEBUG
> extern int shrinker_debugfs_add(struct shrinker *shrinker);
> -extern struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker);
> +extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
> + int *debugfs_id);
> +extern void shrinker_debugfs_remove(struct dentry *debugfs_entry,
> + int debugfs_id);
> extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker,
> const char *fmt, ...);
> #else /* CONFIG_SHRINKER_DEBUG */
> @@ -115,10 +118,16 @@ static inline int shrinker_debugfs_add(struct shrinker *shrinker)
> {
> return 0;
> }
> -static inline struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
> +static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
> + int *debugfs_id)
> {
> + *debugfs_id = -1;
> return NULL;
> }
> +static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry,
> + int debugfs_id)
> +{
> +}
> static inline __printf(2, 3)
> int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
> {
> diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c
> index 3f83b10c5031..fe10436d9911 100644
> --- a/mm/shrinker_debug.c
> +++ b/mm/shrinker_debug.c
> @@ -237,7 +237,8 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
> }
> EXPORT_SYMBOL(shrinker_debugfs_rename);
>
> -struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
> +struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
> + int *debugfs_id)
> {
> struct dentry *entry = shrinker->debugfs_entry;
>
> @@ -246,14 +247,18 @@ struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
> kfree_const(shrinker->name);
> shrinker->name = NULL;
>
> - if (entry) {
> - ida_free(&shrinker_debugfs_ida, shrinker->debugfs_id);
> - shrinker->debugfs_entry = NULL;
> - }
> + *debugfs_id = entry ? shrinker->debugfs_id : -1;
> + shrinker->debugfs_entry = NULL;
>
> return entry;
> }
>
> +void shrinker_debugfs_remove(struct dentry *debugfs_entry, int debugfs_id)
> +{
It would be better to add a check:
if (!debugfs_entry)
return;
> + debugfs_remove_recursive(debugfs_entry);
> + ida_free(&shrinker_debugfs_ida, debugfs_id);
> +}
> +
> static int __init shrinker_debugfs_init(void)
> {
> struct shrinker *shrinker;
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 5bde07409303..c7d0faa343e0 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -805,6 +805,7 @@ EXPORT_SYMBOL(register_shrinker);
> void unregister_shrinker(struct shrinker *shrinker)
> {
> struct dentry *debugfs_entry;
> + int debugfs_id;
>
> if (!(shrinker->flags & SHRINKER_REGISTERED))
> return;
> @@ -814,13 +815,13 @@ void unregister_shrinker(struct shrinker *shrinker)
> shrinker->flags &= ~SHRINKER_REGISTERED;
> if (shrinker->flags & SHRINKER_MEMCG_AWARE)
> unregister_memcg_shrinker(shrinker);
> - debugfs_entry = shrinker_debugfs_remove(shrinker);
> + debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id);
> mutex_unlock(&shrinker_mutex);
>
> atomic_inc(&shrinker_srcu_generation);
> synchronize_srcu(&shrinker_srcu);
>
> - debugfs_remove_recursive(debugfs_entry);
> + shrinker_debugfs_remove(debugfs_entry, debugfs_id);
>
> kfree(shrinker->nr_deferred);
> shrinker->nr_deferred = NULL;
On 2023/5/3 13:37, Qi Zheng wrote:
> > +void shrinker_debugfs_remove(struct dentry *debugfs_entry, int debugfs_id)
> > +{
>
> It would be better to add a check:
>
> if (!debugfs_entry)
> return;
>
> > + debugfs_remove_recursive(debugfs_entry);
> > + ida_free(&shrinker_debugfs_ida, debugfs_id);
> > +}
As a practical matter, both `debugfs_remove_recursive(NULL)` and
`ida_free(_, -1);` are documented as no-ops, see:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/debugfs/inode.c?id=0dd2a6fb1e34d6dcb96806bc6b111388ad324722#n748
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fc82bbf4dede758007763867d0282353c06d1121
Sorry for the late reply (the patch already reached the mainline tree).
On 21/05/2023 14:57, Joan Bruguera Micó wrote:
> On 2023/5/3 13:37, Qi Zheng wrote:
>>> +void shrinker_debugfs_remove(struct dentry *debugfs_entry, int debugfs_id)
>>> +{
>>
>> It would be better to add a check:
>>
>> if (!debugfs_entry)
>> return;
>>
>>> + debugfs_remove_recursive(debugfs_entry);
>>> + ida_free(&shrinker_debugfs_ida, debugfs_id);
>>> +}
>
> As a practical matter, both `debugfs_remove_recursive(NULL)` and
> `ida_free(_, -1);` are documented as no-ops, see:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/debugfs/inode.c?id=0dd2a6fb1e34d6dcb96806bc6b111388ad324722#n748
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fc82bbf4dede758007763867d0282353c06d1121
>
> Sorry for the late reply (the patch already reached the mainline tree).
Note that commit 69cb69ea5542 ("ida: Remove assertions that an ID was
allocated") currently in linux-next makes ida_free(..., -1) illegal.
I see a crash on boot on my test platform (Firefly-RK3288) with
linux-next because of this. Qi's suggested change fixes this.
I'm not sure whether Matthew's change removing the ((int)id < 0) check
was intentional or not. Reinstating that check would also fix the crash.
Steve
Hmmm, indeed linux-next crashes with a null pointer dereference when
calling `ida_free(..., -1)`.
It appears to me that 69cb69ea5542 ("ida: Remove assertions that an ID was
allocated") didn't intend to make `ida_free(..., -1)` invalid; after all,
it was authored & introduced immediately after fc82bbf4dede ("ida: don't
use BUG_ON() for debugging") whose commit message calls for making it
legal, with Matthew's support.
And the referenced Bluetooth HCI code that also calls `ida_free(..., -1)`
is still there, as far as I can tell ([1]).
Rather, probably the `((int)id < 0)` was accidentally dropped, or the idea
was that it would be handled by the `not_found:` label in `ida_free`, but
for that to work you'd need to change the `!test_bit(bit, bitmap->bitmap)`
condition to `!bitmap || !test_bit(bit, bitmap->bitmap)` since otherwise
`bitmap->bitmap` is a null pointer dereference.
I will send a patch to get it fixed.
[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/net/bluetooth/hci_sock.c#n106
On Wed, Jul 26, 2023 at 12:08:26AM +0000, Joan Bruguera Micó wrote:
> Hmmm, indeed linux-next crashes with a null pointer dereference when
> calling `ida_free(..., -1)`.
>
> It appears to me that 69cb69ea5542 ("ida: Remove assertions that an ID was
> allocated") didn't intend to make `ida_free(..., -1)` invalid; after all,
> it was authored & introduced immediately after fc82bbf4dede ("ida: don't
> use BUG_ON() for debugging") whose commit message calls for making it
> legal, with Matthew's support.
> And the referenced Bluetooth HCI code that also calls `ida_free(..., -1)`
> is still there, as far as I can tell ([1]).
>
> Rather, probably the `((int)id < 0)` was accidentally dropped, or the idea
> was that it would be handled by the `not_found:` label in `ida_free`, but
> for that to work you'd need to change the `!test_bit(bit, bitmap->bitmap)`
> condition to `!bitmap || !test_bit(bit, bitmap->bitmap)` since otherwise
> `bitmap->bitmap` is a null pointer dereference.
I've been in two minds about that patch ever since I wrote it. I've
dropped it from my tree for now.
But, er, you have pointed out a bug which is that we don't handle
!bitmap properly. That should be fixed.
@@ -107,7 +107,10 @@ extern void synchronize_shrinkers(void);
#ifdef CONFIG_SHRINKER_DEBUG
extern int shrinker_debugfs_add(struct shrinker *shrinker);
-extern struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker);
+extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
+ int *debugfs_id);
+extern void shrinker_debugfs_remove(struct dentry *debugfs_entry,
+ int debugfs_id);
extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker,
const char *fmt, ...);
#else /* CONFIG_SHRINKER_DEBUG */
@@ -115,10 +118,16 @@ static inline int shrinker_debugfs_add(struct shrinker *shrinker)
{
return 0;
}
-static inline struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
+static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
+ int *debugfs_id)
{
+ *debugfs_id = -1;
return NULL;
}
+static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry,
+ int debugfs_id)
+{
+}
static inline __printf(2, 3)
int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
{
@@ -237,7 +237,8 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
}
EXPORT_SYMBOL(shrinker_debugfs_rename);
-struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
+struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
+ int *debugfs_id)
{
struct dentry *entry = shrinker->debugfs_entry;
@@ -246,14 +247,18 @@ struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
kfree_const(shrinker->name);
shrinker->name = NULL;
- if (entry) {
- ida_free(&shrinker_debugfs_ida, shrinker->debugfs_id);
- shrinker->debugfs_entry = NULL;
- }
+ *debugfs_id = entry ? shrinker->debugfs_id : -1;
+ shrinker->debugfs_entry = NULL;
return entry;
}
+void shrinker_debugfs_remove(struct dentry *debugfs_entry, int debugfs_id)
+{
+ debugfs_remove_recursive(debugfs_entry);
+ ida_free(&shrinker_debugfs_ida, debugfs_id);
+}
+
static int __init shrinker_debugfs_init(void)
{
struct shrinker *shrinker;
@@ -805,6 +805,7 @@ EXPORT_SYMBOL(register_shrinker);
void unregister_shrinker(struct shrinker *shrinker)
{
struct dentry *debugfs_entry;
+ int debugfs_id;
if (!(shrinker->flags & SHRINKER_REGISTERED))
return;
@@ -814,13 +815,13 @@ void unregister_shrinker(struct shrinker *shrinker)
shrinker->flags &= ~SHRINKER_REGISTERED;
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
unregister_memcg_shrinker(shrinker);
- debugfs_entry = shrinker_debugfs_remove(shrinker);
+ debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id);
mutex_unlock(&shrinker_mutex);
atomic_inc(&shrinker_srcu_generation);
synchronize_srcu(&shrinker_srcu);
- debugfs_remove_recursive(debugfs_entry);
+ shrinker_debugfs_remove(debugfs_entry, debugfs_id);
kfree(shrinker->nr_deferred);
shrinker->nr_deferred = NULL;