[2/3] kernfs: Use a per-fs rwsem to protect per-fs list of kernfs_super_info.

Message ID 20230302043203.1695051-3-imran.f.khan@oracle.com
State New
Headers
Series kernfs: Introduce separate rwsem to protect inode |

Commit Message

Imran Khan March 2, 2023, 4:32 a.m. UTC
  Right now per-fs kernfs_rwsem protects list of kernfs_super_info instances
for a kernfs_root. Since kernfs_rwsem is used to synchronize several other
operations across kernfs and since most of these operations don't impact
kernfs_super_info, we can use a separate per-fs rwsem to synchronize access
to list of kernfs_super_info.
This helps in reducing contention around kernfs_rwsem and also allows
operations that change/access list of kernfs_super_info to proceed without
contending for kernfs_rwsem.

Signed-off-by: Imran Khan <imran.f.khan@oracle.com>
---
 fs/kernfs/dir.c             | 1 +
 fs/kernfs/file.c            | 2 ++
 fs/kernfs/kernfs-internal.h | 1 +
 fs/kernfs/mount.c           | 8 ++++----
 4 files changed, 8 insertions(+), 4 deletions(-)
  

Comments

Matthew Wilcox March 2, 2023, 4:08 p.m. UTC | #1
On Thu, Mar 02, 2023 at 03:32:02PM +1100, Imran Khan wrote:
> Right now per-fs kernfs_rwsem protects list of kernfs_super_info instances
> for a kernfs_root. Since kernfs_rwsem is used to synchronize several other
> operations across kernfs and since most of these operations don't impact
> kernfs_super_info, we can use a separate per-fs rwsem to synchronize access
> to list of kernfs_super_info.
> This helps in reducing contention around kernfs_rwsem and also allows
> operations that change/access list of kernfs_super_info to proceed without
> contending for kernfs_rwsem.
> 
> Signed-off-by: Imran Khan <imran.f.khan@oracle.com>

But you don't remove the acquisition of kernfs_rwsem in
kernfs_notify_workfn(), so I don't see how this helps?

Also, every use of this rwsem is as a writer, so it could/should be a
plain mutex, no?  Or should you be acquiring it for read in
kernfs_notify_workfn()?

>  fs/kernfs/dir.c             | 1 +
>  fs/kernfs/file.c            | 2 ++
>  fs/kernfs/kernfs-internal.h | 1 +
>  fs/kernfs/mount.c           | 8 ++++----
>  4 files changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> index 953b2717c60e6..2cdb8516e5287 100644
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -944,6 +944,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
>  	idr_init(&root->ino_idr);
>  	init_rwsem(&root->kernfs_rwsem);
>  	init_rwsem(&root->kernfs_iattr_rwsem);
> +	init_rwsem(&root->kernfs_supers_rwsem);
>  	INIT_LIST_HEAD(&root->supers);
>  
>  	/*
> diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
> index e4a50e4ff0d23..b84cf0cd4bd44 100644
> --- a/fs/kernfs/file.c
> +++ b/fs/kernfs/file.c
> @@ -924,6 +924,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
>  	/* kick fsnotify */
>  	down_write(&root->kernfs_rwsem);
>  
> +	down_write(&root->kernfs_supers_rwsem);
>  	list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
>  		struct kernfs_node *parent;
>  		struct inode *p_inode = NULL;
> @@ -960,6 +961,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
>  		iput(inode);
>  	}
>  
> +	up_write(&root->kernfs_supers_rwsem);
>  	up_write(&root->kernfs_rwsem);
>  	kernfs_put(kn);
>  	goto repeat;
> diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
> index 3297093c920de..a9b854cdfdb5f 100644
> --- a/fs/kernfs/kernfs-internal.h
> +++ b/fs/kernfs/kernfs-internal.h
> @@ -48,6 +48,7 @@ struct kernfs_root {
>  	wait_queue_head_t	deactivate_waitq;
>  	struct rw_semaphore	kernfs_rwsem;
>  	struct rw_semaphore	kernfs_iattr_rwsem;
> +	struct rw_semaphore	kernfs_supers_rwsem;
>  };
>  
>  /* +1 to avoid triggering overflow warning when negating it */
> diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
> index e08e8d9998070..d49606accb07b 100644
> --- a/fs/kernfs/mount.c
> +++ b/fs/kernfs/mount.c
> @@ -351,9 +351,9 @@ int kernfs_get_tree(struct fs_context *fc)
>  		}
>  		sb->s_flags |= SB_ACTIVE;
>  
> -		down_write(&root->kernfs_rwsem);
> +		down_write(&root->kernfs_supers_rwsem);
>  		list_add(&info->node, &info->root->supers);
> -		up_write(&root->kernfs_rwsem);
> +		up_write(&root->kernfs_supers_rwsem);
>  	}
>  
>  	fc->root = dget(sb->s_root);
> @@ -380,9 +380,9 @@ void kernfs_kill_sb(struct super_block *sb)
>  	struct kernfs_super_info *info = kernfs_info(sb);
>  	struct kernfs_root *root = info->root;
>  
> -	down_write(&root->kernfs_rwsem);
> +	down_write(&root->kernfs_supers_rwsem);
>  	list_del(&info->node);
> -	up_write(&root->kernfs_rwsem);
> +	up_write(&root->kernfs_supers_rwsem);
>  
>  	/*
>  	 * Remove the superblock from fs_supers/s_instances
> -- 
> 2.34.1
>
  
Imran Khan March 2, 2023, 9:28 p.m. UTC | #2
Hello Matthew,
Thanks for reviewing this.

On 3/3/2023 3:08 am, Matthew Wilcox wrote:
> On Thu, Mar 02, 2023 at 03:32:02PM +1100, Imran Khan wrote:
>> Right now per-fs kernfs_rwsem protects list of kernfs_super_info instances
>> for a kernfs_root. Since kernfs_rwsem is used to synchronize several other
>> operations across kernfs and since most of these operations don't impact
>> kernfs_super_info, we can use a separate per-fs rwsem to synchronize access
>> to list of kernfs_super_info.
>> This helps in reducing contention around kernfs_rwsem and also allows
>> operations that change/access list of kernfs_super_info to proceed without
>> contending for kernfs_rwsem.
>>
>> Signed-off-by: Imran Khan <imran.f.khan@oracle.com>
> 
> But you don't remove the acquisition of kernfs_rwsem in
> kernfs_notify_workfn(), so I don't see how this helps?
> 
Yes. kernfs_notify_workfn should no longer need kernfs_rwsem. I will fix it .
> Also, every use of this rwsem is as a writer, so it could/should be a
> plain mutex, no?  Or should you be acquiring it for read in
> kernfs_notify_workfn()?

Although currently kernfs_notify_workfn acquires kernfs_rwsem for writing, I
think even w/o this change acquiring kernfs_rwsem for reading would be enough
since we are not making any changes to kernfs_super_info list.
Based on this logic, I think taking iattr rwsem for reading is right approach.

Thanks,
Imran
  

Patch

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 953b2717c60e6..2cdb8516e5287 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -944,6 +944,7 @@  struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
 	idr_init(&root->ino_idr);
 	init_rwsem(&root->kernfs_rwsem);
 	init_rwsem(&root->kernfs_iattr_rwsem);
+	init_rwsem(&root->kernfs_supers_rwsem);
 	INIT_LIST_HEAD(&root->supers);
 
 	/*
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e4a50e4ff0d23..b84cf0cd4bd44 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -924,6 +924,7 @@  static void kernfs_notify_workfn(struct work_struct *work)
 	/* kick fsnotify */
 	down_write(&root->kernfs_rwsem);
 
+	down_write(&root->kernfs_supers_rwsem);
 	list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
 		struct kernfs_node *parent;
 		struct inode *p_inode = NULL;
@@ -960,6 +961,7 @@  static void kernfs_notify_workfn(struct work_struct *work)
 		iput(inode);
 	}
 
+	up_write(&root->kernfs_supers_rwsem);
 	up_write(&root->kernfs_rwsem);
 	kernfs_put(kn);
 	goto repeat;
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 3297093c920de..a9b854cdfdb5f 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -48,6 +48,7 @@  struct kernfs_root {
 	wait_queue_head_t	deactivate_waitq;
 	struct rw_semaphore	kernfs_rwsem;
 	struct rw_semaphore	kernfs_iattr_rwsem;
+	struct rw_semaphore	kernfs_supers_rwsem;
 };
 
 /* +1 to avoid triggering overflow warning when negating it */
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index e08e8d9998070..d49606accb07b 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -351,9 +351,9 @@  int kernfs_get_tree(struct fs_context *fc)
 		}
 		sb->s_flags |= SB_ACTIVE;
 
-		down_write(&root->kernfs_rwsem);
+		down_write(&root->kernfs_supers_rwsem);
 		list_add(&info->node, &info->root->supers);
-		up_write(&root->kernfs_rwsem);
+		up_write(&root->kernfs_supers_rwsem);
 	}
 
 	fc->root = dget(sb->s_root);
@@ -380,9 +380,9 @@  void kernfs_kill_sb(struct super_block *sb)
 	struct kernfs_super_info *info = kernfs_info(sb);
 	struct kernfs_root *root = info->root;
 
-	down_write(&root->kernfs_rwsem);
+	down_write(&root->kernfs_supers_rwsem);
 	list_del(&info->node);
-	up_write(&root->kernfs_rwsem);
+	up_write(&root->kernfs_supers_rwsem);
 
 	/*
 	 * Remove the superblock from fs_supers/s_instances