[1/2,v2] ceph: add ceph_lock_info support for file_lock

Message ID 20221114051901.15371-2-xiubli@redhat.com
State New
Headers
Series ceph: fix the use-after-free bug for file_lock |

Commit Message

Xiubo Li Nov. 14, 2022, 5:19 a.m. UTC
  From: Xiubo Li <xiubli@redhat.com>

When ceph releasing the file_lock it will try to get the inode pointer
from the fl->fl_file, which the memory could already be released by
another thread in filp_close(). Because in VFS layer the fl->fl_file
doesn't increase the file's reference counter.

Will switch to use ceph dedicate lock info to track the inode.

And in ceph_fl_release_lock() we should skip all the operations if
the fl->fl_u.ceph_fl.fl_inode is not set, which should come from
the request file_lock. And we will set fl->fl_u.ceph_fl.fl_inode when
inserting it to the inode lock list, which is when copying the lock.

Cc: stable@vger.kernel.org
URL: https://tracker.ceph.com/issues/57986
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/locks.c                 | 18 +++++++++++++++---
 include/linux/ceph/ceph_fs_fl.h | 26 ++++++++++++++++++++++++++
 include/linux/fs.h              |  2 ++
 3 files changed, 43 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/ceph/ceph_fs_fl.h
  

Comments

Jeff Layton Nov. 14, 2022, 11:24 a.m. UTC | #1
On Mon, 2022-11-14 at 13:19 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> When ceph releasing the file_lock it will try to get the inode pointer
> from the fl->fl_file, which the memory could already be released by
> another thread in filp_close(). Because in VFS layer the fl->fl_file
> doesn't increase the file's reference counter.
> 
> Will switch to use ceph dedicate lock info to track the inode.
> 
> And in ceph_fl_release_lock() we should skip all the operations if
> the fl->fl_u.ceph_fl.fl_inode is not set, which should come from
> the request file_lock. And we will set fl->fl_u.ceph_fl.fl_inode when
> inserting it to the inode lock list, which is when copying the lock.
> 
> Cc: stable@vger.kernel.org
> URL: https://tracker.ceph.com/issues/57986
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/locks.c                 | 18 +++++++++++++++---
>  include/linux/ceph/ceph_fs_fl.h | 26 ++++++++++++++++++++++++++
>  include/linux/fs.h              |  2 ++
>  3 files changed, 43 insertions(+), 3 deletions(-)
>  create mode 100644 include/linux/ceph/ceph_fs_fl.h
> 
> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
> index 3e2843e86e27..d8385dd0076e 100644
> --- a/fs/ceph/locks.c
> +++ b/fs/ceph/locks.c
> @@ -34,22 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
>  {
>  	struct ceph_file_info *fi = dst->fl_file->private_data;
>  	struct inode *inode = file_inode(dst->fl_file);
> +
>  	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
>  	atomic_inc(&fi->num_locks);
> +	dst->fl_u.ceph_fl.fl_inode = igrab(inode);
>  }
>  
>  static void ceph_fl_release_lock(struct file_lock *fl)
>  {
>  	struct ceph_file_info *fi = fl->fl_file->private_data;
> -	struct inode *inode = file_inode(fl->fl_file);
> -	struct ceph_inode_info *ci = ceph_inode(inode);
> -	atomic_dec(&fi->num_locks);
> +	struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
> +	struct ceph_inode_info *ci;
> +
> +	/*
> +	 * If inode is NULL it should be a request file_lock,
> +	 * nothing we can do.
> +	 */
> +	if (!inode)
> +		return;
> +
> +	ci = ceph_inode(inode);
>  	if (atomic_dec_and_test(&ci->i_filelock_ref)) {
>  		/* clear error when all locks are released */
>  		spin_lock(&ci->i_ceph_lock);
>  		ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
>  		spin_unlock(&ci->i_ceph_lock);
>  	}
> +	iput(inode);
> +	atomic_dec(&fi->num_locks);

It doesn't look like this fixes the original issue. "fi" may be pointing
to freed memory at this point, right? I think you may need to get rid of
the "num_locks" field in ceph_file_info, and do that in a different way?

>  }
>  
>  static const struct file_lock_operations ceph_fl_lock_ops = {
> diff --git a/include/linux/ceph/ceph_fs_fl.h b/include/linux/ceph/ceph_fs_fl.h
> new file mode 100644
> index 000000000000..02a412b26095
> --- /dev/null
> +++ b/include/linux/ceph/ceph_fs_fl.h
> @@ -0,0 +1,26 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * ceph_fs.h - Ceph constants and data types to share between kernel and
> + * user space.
> + *
> + * Most types in this file are defined as little-endian, and are
> + * primarily intended to describe data structures that pass over the
> + * wire or that are stored on disk.
> + *
> + * LGPL2
> + */
> +
> +#ifndef CEPH_FS_FL_H
> +#define CEPH_FS_FL_H
> +
> +#include <linux/fs.h>
> +
> +/*
> + * Ceph lock info
> + */
> +
> +struct ceph_lock_info {
> +	struct inode *fl_inode;
> +};
> +
> +#endif
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index e654435f1651..db4810d19e26 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1066,6 +1066,7 @@ bool opens_in_grace(struct net *);
>  
>  /* that will die - we need it for nfs_lock_info */
>  #include <linux/nfs_fs_i.h>
> +#include <linux/ceph/ceph_fs_fl.h>
>  
>  /*
>   * struct file_lock represents a generic "file lock". It's used to represent
> @@ -1119,6 +1120,7 @@ struct file_lock {
>  			int state;		/* state of grant or error if -ve */
>  			unsigned int	debug_id;
>  		} afs;
> +		struct ceph_lock_info	ceph_fl;
>  	} fl_u;
>  } __randomize_layout;
>
  
Xiubo Li Nov. 14, 2022, 1 p.m. UTC | #2
On 14/11/2022 19:24, Jeff Layton wrote:
> On Mon, 2022-11-14 at 13:19 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> When ceph releasing the file_lock it will try to get the inode pointer
>> from the fl->fl_file, which the memory could already be released by
>> another thread in filp_close(). Because in VFS layer the fl->fl_file
>> doesn't increase the file's reference counter.
>>
>> Will switch to use ceph dedicate lock info to track the inode.
>>
>> And in ceph_fl_release_lock() we should skip all the operations if
>> the fl->fl_u.ceph_fl.fl_inode is not set, which should come from
>> the request file_lock. And we will set fl->fl_u.ceph_fl.fl_inode when
>> inserting it to the inode lock list, which is when copying the lock.
>>
>> Cc: stable@vger.kernel.org
>> URL: https://tracker.ceph.com/issues/57986
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/locks.c                 | 18 +++++++++++++++---
>>   include/linux/ceph/ceph_fs_fl.h | 26 ++++++++++++++++++++++++++
>>   include/linux/fs.h              |  2 ++
>>   3 files changed, 43 insertions(+), 3 deletions(-)
>>   create mode 100644 include/linux/ceph/ceph_fs_fl.h
>>
>> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
>> index 3e2843e86e27..d8385dd0076e 100644
>> --- a/fs/ceph/locks.c
>> +++ b/fs/ceph/locks.c
>> @@ -34,22 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
>>   {
>>   	struct ceph_file_info *fi = dst->fl_file->private_data;
>>   	struct inode *inode = file_inode(dst->fl_file);
>> +
>>   	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
>>   	atomic_inc(&fi->num_locks);
>> +	dst->fl_u.ceph_fl.fl_inode = igrab(inode);
>>   }
>>   
>>   static void ceph_fl_release_lock(struct file_lock *fl)
>>   {
>>   	struct ceph_file_info *fi = fl->fl_file->private_data;
>> -	struct inode *inode = file_inode(fl->fl_file);
>> -	struct ceph_inode_info *ci = ceph_inode(inode);
>> -	atomic_dec(&fi->num_locks);
>> +	struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
>> +	struct ceph_inode_info *ci;
>> +
>> +	/*
>> +	 * If inode is NULL it should be a request file_lock,
>> +	 * nothing we can do.
>> +	 */
>> +	if (!inode)
>> +		return;
>> +
>> +	ci = ceph_inode(inode);
>>   	if (atomic_dec_and_test(&ci->i_filelock_ref)) {
>>   		/* clear error when all locks are released */
>>   		spin_lock(&ci->i_ceph_lock);
>>   		ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
>>   		spin_unlock(&ci->i_ceph_lock);
>>   	}
>> +	iput(inode);
>> +	atomic_dec(&fi->num_locks);
> It doesn't look like this fixes the original issue. "fi" may be pointing
> to freed memory at this point, right?

Yeah, I didn't fix this in the this patch. I fixed it in a dedicated 2/2 
patch.

>   I think you may need to get rid of
> the "num_locks" field in ceph_file_info, and do that in a different way?
>
This is a dedicated field for each 'file' struct. I have no idea how to 
fix this in a different way yet.

Thanks!

- Xiubo


>>   }
>>   
>>   static const struct file_lock_operations ceph_fl_lock_ops = {
>> diff --git a/include/linux/ceph/ceph_fs_fl.h b/include/linux/ceph/ceph_fs_fl.h
>> new file mode 100644
>> index 000000000000..02a412b26095
>> --- /dev/null
>> +++ b/include/linux/ceph/ceph_fs_fl.h
>> @@ -0,0 +1,26 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * ceph_fs.h - Ceph constants and data types to share between kernel and
>> + * user space.
>> + *
>> + * Most types in this file are defined as little-endian, and are
>> + * primarily intended to describe data structures that pass over the
>> + * wire or that are stored on disk.
>> + *
>> + * LGPL2
>> + */
>> +
>> +#ifndef CEPH_FS_FL_H
>> +#define CEPH_FS_FL_H
>> +
>> +#include <linux/fs.h>
>> +
>> +/*
>> + * Ceph lock info
>> + */
>> +
>> +struct ceph_lock_info {
>> +	struct inode *fl_inode;
>> +};
>> +
>> +#endif
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index e654435f1651..db4810d19e26 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -1066,6 +1066,7 @@ bool opens_in_grace(struct net *);
>>   
>>   /* that will die - we need it for nfs_lock_info */
>>   #include <linux/nfs_fs_i.h>
>> +#include <linux/ceph/ceph_fs_fl.h>
>>   
>>   /*
>>    * struct file_lock represents a generic "file lock". It's used to represent
>> @@ -1119,6 +1120,7 @@ struct file_lock {
>>   			int state;		/* state of grant or error if -ve */
>>   			unsigned int	debug_id;
>>   		} afs;
>> +		struct ceph_lock_info	ceph_fl;
>>   	} fl_u;
>>   } __randomize_layout;
>>
  

Patch

diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 3e2843e86e27..d8385dd0076e 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -34,22 +34,34 @@  static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
 {
 	struct ceph_file_info *fi = dst->fl_file->private_data;
 	struct inode *inode = file_inode(dst->fl_file);
+
 	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
 	atomic_inc(&fi->num_locks);
+	dst->fl_u.ceph_fl.fl_inode = igrab(inode);
 }
 
 static void ceph_fl_release_lock(struct file_lock *fl)
 {
 	struct ceph_file_info *fi = fl->fl_file->private_data;
-	struct inode *inode = file_inode(fl->fl_file);
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	atomic_dec(&fi->num_locks);
+	struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
+	struct ceph_inode_info *ci;
+
+	/*
+	 * If inode is NULL it should be a request file_lock,
+	 * nothing we can do.
+	 */
+	if (!inode)
+		return;
+
+	ci = ceph_inode(inode);
 	if (atomic_dec_and_test(&ci->i_filelock_ref)) {
 		/* clear error when all locks are released */
 		spin_lock(&ci->i_ceph_lock);
 		ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
 		spin_unlock(&ci->i_ceph_lock);
 	}
+	iput(inode);
+	atomic_dec(&fi->num_locks);
 }
 
 static const struct file_lock_operations ceph_fl_lock_ops = {
diff --git a/include/linux/ceph/ceph_fs_fl.h b/include/linux/ceph/ceph_fs_fl.h
new file mode 100644
index 000000000000..02a412b26095
--- /dev/null
+++ b/include/linux/ceph/ceph_fs_fl.h
@@ -0,0 +1,26 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ceph_fs.h - Ceph constants and data types to share between kernel and
+ * user space.
+ *
+ * Most types in this file are defined as little-endian, and are
+ * primarily intended to describe data structures that pass over the
+ * wire or that are stored on disk.
+ *
+ * LGPL2
+ */
+
+#ifndef CEPH_FS_FL_H
+#define CEPH_FS_FL_H
+
+#include <linux/fs.h>
+
+/*
+ * Ceph lock info
+ */
+
+struct ceph_lock_info {
+	struct inode *fl_inode;
+};
+
+#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e654435f1651..db4810d19e26 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1066,6 +1066,7 @@  bool opens_in_grace(struct net *);
 
 /* that will die - we need it for nfs_lock_info */
 #include <linux/nfs_fs_i.h>
+#include <linux/ceph/ceph_fs_fl.h>
 
 /*
  * struct file_lock represents a generic "file lock". It's used to represent
@@ -1119,6 +1120,7 @@  struct file_lock {
 			int state;		/* state of grant or error if -ve */
 			unsigned int	debug_id;
 		} afs;
+		struct ceph_lock_info	ceph_fl;
 	} fl_u;
 } __randomize_layout;