[1/2,v2] ceph: add ceph_lock_info support for file_lock
Commit Message
From: Xiubo Li <xiubli@redhat.com>
When ceph releasing the file_lock it will try to get the inode pointer
from the fl->fl_file, which the memory could already be released by
another thread in filp_close(). Because in VFS layer the fl->fl_file
doesn't increase the file's reference counter.
Will switch to use ceph dedicate lock info to track the inode.
And in ceph_fl_release_lock() we should skip all the operations if
the fl->fl_u.ceph_fl.fl_inode is not set, which should come from
the request file_lock. And we will set fl->fl_u.ceph_fl.fl_inode when
inserting it to the inode lock list, which is when copying the lock.
Cc: stable@vger.kernel.org
URL: https://tracker.ceph.com/issues/57986
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/locks.c | 18 +++++++++++++++---
include/linux/ceph/ceph_fs_fl.h | 26 ++++++++++++++++++++++++++
include/linux/fs.h | 2 ++
3 files changed, 43 insertions(+), 3 deletions(-)
create mode 100644 include/linux/ceph/ceph_fs_fl.h
Comments
On Mon, 2022-11-14 at 13:19 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> When ceph releasing the file_lock it will try to get the inode pointer
> from the fl->fl_file, which the memory could already be released by
> another thread in filp_close(). Because in VFS layer the fl->fl_file
> doesn't increase the file's reference counter.
>
> Will switch to use ceph dedicate lock info to track the inode.
>
> And in ceph_fl_release_lock() we should skip all the operations if
> the fl->fl_u.ceph_fl.fl_inode is not set, which should come from
> the request file_lock. And we will set fl->fl_u.ceph_fl.fl_inode when
> inserting it to the inode lock list, which is when copying the lock.
>
> Cc: stable@vger.kernel.org
> URL: https://tracker.ceph.com/issues/57986
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> fs/ceph/locks.c | 18 +++++++++++++++---
> include/linux/ceph/ceph_fs_fl.h | 26 ++++++++++++++++++++++++++
> include/linux/fs.h | 2 ++
> 3 files changed, 43 insertions(+), 3 deletions(-)
> create mode 100644 include/linux/ceph/ceph_fs_fl.h
>
> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
> index 3e2843e86e27..d8385dd0076e 100644
> --- a/fs/ceph/locks.c
> +++ b/fs/ceph/locks.c
> @@ -34,22 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
> {
> struct ceph_file_info *fi = dst->fl_file->private_data;
> struct inode *inode = file_inode(dst->fl_file);
> +
> atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> atomic_inc(&fi->num_locks);
> + dst->fl_u.ceph_fl.fl_inode = igrab(inode);
> }
>
> static void ceph_fl_release_lock(struct file_lock *fl)
> {
> struct ceph_file_info *fi = fl->fl_file->private_data;
> - struct inode *inode = file_inode(fl->fl_file);
> - struct ceph_inode_info *ci = ceph_inode(inode);
> - atomic_dec(&fi->num_locks);
> + struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
> + struct ceph_inode_info *ci;
> +
> + /*
> + * If inode is NULL it should be a request file_lock,
> + * nothing we can do.
> + */
> + if (!inode)
> + return;
> +
> + ci = ceph_inode(inode);
> if (atomic_dec_and_test(&ci->i_filelock_ref)) {
> /* clear error when all locks are released */
> spin_lock(&ci->i_ceph_lock);
> ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
> spin_unlock(&ci->i_ceph_lock);
> }
> + iput(inode);
> + atomic_dec(&fi->num_locks);
It doesn't look like this fixes the original issue. "fi" may be pointing
to freed memory at this point, right? I think you may need to get rid of
the "num_locks" field in ceph_file_info, and do that in a different way?
> }
>
> static const struct file_lock_operations ceph_fl_lock_ops = {
> diff --git a/include/linux/ceph/ceph_fs_fl.h b/include/linux/ceph/ceph_fs_fl.h
> new file mode 100644
> index 000000000000..02a412b26095
> --- /dev/null
> +++ b/include/linux/ceph/ceph_fs_fl.h
> @@ -0,0 +1,26 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * ceph_fs.h - Ceph constants and data types to share between kernel and
> + * user space.
> + *
> + * Most types in this file are defined as little-endian, and are
> + * primarily intended to describe data structures that pass over the
> + * wire or that are stored on disk.
> + *
> + * LGPL2
> + */
> +
> +#ifndef CEPH_FS_FL_H
> +#define CEPH_FS_FL_H
> +
> +#include <linux/fs.h>
> +
> +/*
> + * Ceph lock info
> + */
> +
> +struct ceph_lock_info {
> + struct inode *fl_inode;
> +};
> +
> +#endif
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index e654435f1651..db4810d19e26 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1066,6 +1066,7 @@ bool opens_in_grace(struct net *);
>
> /* that will die - we need it for nfs_lock_info */
> #include <linux/nfs_fs_i.h>
> +#include <linux/ceph/ceph_fs_fl.h>
>
> /*
> * struct file_lock represents a generic "file lock". It's used to represent
> @@ -1119,6 +1120,7 @@ struct file_lock {
> int state; /* state of grant or error if -ve */
> unsigned int debug_id;
> } afs;
> + struct ceph_lock_info ceph_fl;
> } fl_u;
> } __randomize_layout;
>
On 14/11/2022 19:24, Jeff Layton wrote:
> On Mon, 2022-11-14 at 13:19 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> When ceph releasing the file_lock it will try to get the inode pointer
>> from the fl->fl_file, which the memory could already be released by
>> another thread in filp_close(). Because in VFS layer the fl->fl_file
>> doesn't increase the file's reference counter.
>>
>> Will switch to use ceph dedicate lock info to track the inode.
>>
>> And in ceph_fl_release_lock() we should skip all the operations if
>> the fl->fl_u.ceph_fl.fl_inode is not set, which should come from
>> the request file_lock. And we will set fl->fl_u.ceph_fl.fl_inode when
>> inserting it to the inode lock list, which is when copying the lock.
>>
>> Cc: stable@vger.kernel.org
>> URL: https://tracker.ceph.com/issues/57986
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>> fs/ceph/locks.c | 18 +++++++++++++++---
>> include/linux/ceph/ceph_fs_fl.h | 26 ++++++++++++++++++++++++++
>> include/linux/fs.h | 2 ++
>> 3 files changed, 43 insertions(+), 3 deletions(-)
>> create mode 100644 include/linux/ceph/ceph_fs_fl.h
>>
>> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
>> index 3e2843e86e27..d8385dd0076e 100644
>> --- a/fs/ceph/locks.c
>> +++ b/fs/ceph/locks.c
>> @@ -34,22 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
>> {
>> struct ceph_file_info *fi = dst->fl_file->private_data;
>> struct inode *inode = file_inode(dst->fl_file);
>> +
>> atomic_inc(&ceph_inode(inode)->i_filelock_ref);
>> atomic_inc(&fi->num_locks);
>> + dst->fl_u.ceph_fl.fl_inode = igrab(inode);
>> }
>>
>> static void ceph_fl_release_lock(struct file_lock *fl)
>> {
>> struct ceph_file_info *fi = fl->fl_file->private_data;
>> - struct inode *inode = file_inode(fl->fl_file);
>> - struct ceph_inode_info *ci = ceph_inode(inode);
>> - atomic_dec(&fi->num_locks);
>> + struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
>> + struct ceph_inode_info *ci;
>> +
>> + /*
>> + * If inode is NULL it should be a request file_lock,
>> + * nothing we can do.
>> + */
>> + if (!inode)
>> + return;
>> +
>> + ci = ceph_inode(inode);
>> if (atomic_dec_and_test(&ci->i_filelock_ref)) {
>> /* clear error when all locks are released */
>> spin_lock(&ci->i_ceph_lock);
>> ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
>> spin_unlock(&ci->i_ceph_lock);
>> }
>> + iput(inode);
>> + atomic_dec(&fi->num_locks);
> It doesn't look like this fixes the original issue. "fi" may be pointing
> to freed memory at this point, right?
Yeah, I didn't fix this in the this patch. I fixed it in a dedicated 2/2
patch.
> I think you may need to get rid of
> the "num_locks" field in ceph_file_info, and do that in a different way?
>
This is a dedicated field for each 'file' struct. I have no idea how to
fix this in a different way yet.
Thanks!
- Xiubo
>> }
>>
>> static const struct file_lock_operations ceph_fl_lock_ops = {
>> diff --git a/include/linux/ceph/ceph_fs_fl.h b/include/linux/ceph/ceph_fs_fl.h
>> new file mode 100644
>> index 000000000000..02a412b26095
>> --- /dev/null
>> +++ b/include/linux/ceph/ceph_fs_fl.h
>> @@ -0,0 +1,26 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * ceph_fs.h - Ceph constants and data types to share between kernel and
>> + * user space.
>> + *
>> + * Most types in this file are defined as little-endian, and are
>> + * primarily intended to describe data structures that pass over the
>> + * wire or that are stored on disk.
>> + *
>> + * LGPL2
>> + */
>> +
>> +#ifndef CEPH_FS_FL_H
>> +#define CEPH_FS_FL_H
>> +
>> +#include <linux/fs.h>
>> +
>> +/*
>> + * Ceph lock info
>> + */
>> +
>> +struct ceph_lock_info {
>> + struct inode *fl_inode;
>> +};
>> +
>> +#endif
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index e654435f1651..db4810d19e26 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -1066,6 +1066,7 @@ bool opens_in_grace(struct net *);
>>
>> /* that will die - we need it for nfs_lock_info */
>> #include <linux/nfs_fs_i.h>
>> +#include <linux/ceph/ceph_fs_fl.h>
>>
>> /*
>> * struct file_lock represents a generic "file lock". It's used to represent
>> @@ -1119,6 +1120,7 @@ struct file_lock {
>> int state; /* state of grant or error if -ve */
>> unsigned int debug_id;
>> } afs;
>> + struct ceph_lock_info ceph_fl;
>> } fl_u;
>> } __randomize_layout;
>>
@@ -34,22 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
{
struct ceph_file_info *fi = dst->fl_file->private_data;
struct inode *inode = file_inode(dst->fl_file);
+
atomic_inc(&ceph_inode(inode)->i_filelock_ref);
atomic_inc(&fi->num_locks);
+ dst->fl_u.ceph_fl.fl_inode = igrab(inode);
}
static void ceph_fl_release_lock(struct file_lock *fl)
{
struct ceph_file_info *fi = fl->fl_file->private_data;
- struct inode *inode = file_inode(fl->fl_file);
- struct ceph_inode_info *ci = ceph_inode(inode);
- atomic_dec(&fi->num_locks);
+ struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
+ struct ceph_inode_info *ci;
+
+ /*
+ * If inode is NULL it should be a request file_lock,
+ * nothing we can do.
+ */
+ if (!inode)
+ return;
+
+ ci = ceph_inode(inode);
if (atomic_dec_and_test(&ci->i_filelock_ref)) {
/* clear error when all locks are released */
spin_lock(&ci->i_ceph_lock);
ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
spin_unlock(&ci->i_ceph_lock);
}
+ iput(inode);
+ atomic_dec(&fi->num_locks);
}
static const struct file_lock_operations ceph_fl_lock_ops = {
new file mode 100644
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ceph_fs.h - Ceph constants and data types to share between kernel and
+ * user space.
+ *
+ * Most types in this file are defined as little-endian, and are
+ * primarily intended to describe data structures that pass over the
+ * wire or that are stored on disk.
+ *
+ * LGPL2
+ */
+
+#ifndef CEPH_FS_FL_H
+#define CEPH_FS_FL_H
+
+#include <linux/fs.h>
+
+/*
+ * Ceph lock info
+ */
+
+struct ceph_lock_info {
+ struct inode *fl_inode;
+};
+
+#endif
@@ -1066,6 +1066,7 @@ bool opens_in_grace(struct net *);
/* that will die - we need it for nfs_lock_info */
#include <linux/nfs_fs_i.h>
+#include <linux/ceph/ceph_fs_fl.h>
/*
* struct file_lock represents a generic "file lock". It's used to represent
@@ -1119,6 +1120,7 @@ struct file_lock {
int state; /* state of grant or error if -ve */
unsigned int debug_id;
} afs;
+ struct ceph_lock_info ceph_fl;
} fl_u;
} __randomize_layout;