[v2,5/7] fs: FS_IOC_GETSYSFSNAME
Commit Message
Add a new ioctl for getting the sysfs name of a filesystem - the path
under /sys/fs.
This is going to let us standardize exporting data from sysfs across
filesystems, e.g. time stats.
The returned path will always be of the form "$FSTYP/$SYSFS_IDENTIFIER",
where the sysfs identifier may be a UUID (for bcachefs) or a device name
(xfs).
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/ioctl.c | 17 +++++++++++++++++
include/linux/fs.h | 1 +
include/uapi/linux/fs.h | 10 ++++++++++
3 files changed, 28 insertions(+)
Comments
On Tue, Feb 06, 2024 at 03:18:53PM -0500, Kent Overstreet wrote:
> Add a new ioctl for getting the sysfs name of a filesystem - the path
> under /sys/fs.
>
> This is going to let us standardize exporting data from sysfs across
> filesystems, e.g. time stats.
>
> The returned path will always be of the form "$FSTYP/$SYSFS_IDENTIFIER",
> where the sysfs identifier may be a UUID (for bcachefs) or a device name
> (xfs).
>
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: Jan Kara <jack@suse.cz>
> Cc: Dave Chinner <dchinner@redhat.com>
> Cc: "Darrick J. Wong" <djwong@kernel.org>
> Cc: Theodore Ts'o <tytso@mit.edu>
> Cc: Josef Bacik <josef@toxicpanda.com>
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> ---
> fs/ioctl.c | 17 +++++++++++++++++
> include/linux/fs.h | 1 +
> include/uapi/linux/fs.h | 10 ++++++++++
> 3 files changed, 28 insertions(+)
>
> diff --git a/fs/ioctl.c b/fs/ioctl.c
> index 046c30294a82..7c37765bd04e 100644
> --- a/fs/ioctl.c
> +++ b/fs/ioctl.c
> @@ -776,6 +776,20 @@ static int ioctl_getfsuuid(struct file *file, void __user *argp)
> return copy_to_user(argp, &u, sizeof(u)) ? -EFAULT : 0;
> }
>
> +static int ioctl_get_fs_sysfs_path(struct file *file, void __user *argp)
> +{
> + struct super_block *sb = file_inode(file)->i_sb;
> +
> + if (!strlen(sb->s_sysfs_name))
> + return -ENOIOCTLCMD;
This relies on the kernel developers getting string termination
right and not overflowing buffers.
We can do better, I think, and I suspect that starts with a
super_set_sysfs_name() helper....
> + struct fssysfspath u = {};
Variable names that look like the cat just walked over the keyboard
are difficult to read. Please use some separators here!
Also, same comment as previous about mixing code and declarations.
> +
> + snprintf(u.name, sizeof(u.name), "%s/%s", sb->s_type->name, sb->s_sysfs_name);
What happens if the combined path overflows the fssysfspath
buffer?
> +
> + return copy_to_user(argp, &u, sizeof(u)) ? -EFAULT : 0;
> +}
> +
> /*
> * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
> * It's just a simple helper for sys_ioctl and compat_sys_ioctl.
> @@ -861,6 +875,9 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
> case FS_IOC_GETFSUUID:
> return ioctl_getfsuuid(filp, argp);
>
> + case FS_IOC_GETFSSYSFSPATH:
> + return ioctl_get_fs_sysfs_path(filp, argp);
> +
> default:
> if (S_ISREG(inode->i_mode))
> return file_ioctl(filp, cmd, argp);
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index acdc56987cb1..b96c1d009718 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1258,6 +1258,7 @@ struct super_block {
> char s_id[32]; /* Informational name */
> uuid_t s_uuid; /* UUID */
> u8 s_uuid_len; /* Default 16, possibly smaller for weird filesystems */
> + char s_sysfs_name[UUID_STRING_LEN + 1];
Can we just kstrdup() the sysfs name and keep a {ptr, len} pair
in the sb for this? Then we can treat them as an opaque identifier
that isn't actually a string, and we don't have to make up some
arbitrary maximum name size, either.
> unsigned int s_max_links;
>
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index 16a6ecadfd8d..c0f7bd4b6350 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -77,6 +77,10 @@ struct fsuuid2 {
> __u8 uuid[16];
> };
>
> +struct fssysfspath {
> + __u8 name[128];
> +};
Undocumented magic number in a UAPI. Why was this chosen?
IMO, we shouldn't be returning strings that require the the kernel
to place NULLs correctly and for the caller to detect said NULLs to
determine their length, so something like:
struct fs_sysfs_path {
__u32 name_len;
__u8 name[NAME_MAX];
};
Seems better to me...
-Dave.
On Wed, Feb 07, 2024 at 09:26:40AM +1100, Dave Chinner wrote:
> On Tue, Feb 06, 2024 at 03:18:53PM -0500, Kent Overstreet wrote:
> > Add a new ioctl for getting the sysfs name of a filesystem - the path
> > under /sys/fs.
> >
> > This is going to let us standardize exporting data from sysfs across
> > filesystems, e.g. time stats.
> >
> > The returned path will always be of the form "$FSTYP/$SYSFS_IDENTIFIER",
> > where the sysfs identifier may be a UUID (for bcachefs) or a device name
> > (xfs).
> >
> > Cc: Christian Brauner <brauner@kernel.org>
> > Cc: Jan Kara <jack@suse.cz>
> > Cc: Dave Chinner <dchinner@redhat.com>
> > Cc: "Darrick J. Wong" <djwong@kernel.org>
> > Cc: Theodore Ts'o <tytso@mit.edu>
> > Cc: Josef Bacik <josef@toxicpanda.com>
> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > ---
> > fs/ioctl.c | 17 +++++++++++++++++
> > include/linux/fs.h | 1 +
> > include/uapi/linux/fs.h | 10 ++++++++++
> > 3 files changed, 28 insertions(+)
> >
> > diff --git a/fs/ioctl.c b/fs/ioctl.c
> > index 046c30294a82..7c37765bd04e 100644
> > --- a/fs/ioctl.c
> > +++ b/fs/ioctl.c
> > @@ -776,6 +776,20 @@ static int ioctl_getfsuuid(struct file *file, void __user *argp)
> > return copy_to_user(argp, &u, sizeof(u)) ? -EFAULT : 0;
> > }
> >
> > +static int ioctl_get_fs_sysfs_path(struct file *file, void __user *argp)
> > +{
> > + struct super_block *sb = file_inode(file)->i_sb;
> > +
> > + if (!strlen(sb->s_sysfs_name))
> > + return -ENOIOCTLCMD;
>
> This relies on the kernel developers getting string termination
> right and not overflowing buffers.
>
> We can do better, I think, and I suspect that starts with a
> super_set_sysfs_name() helper....
I don't think that's needed here; a standard snprintf() ensures that the
buffer is null terminated, even if the result overflowed.
> > + struct fssysfspath u = {};
>
> Variable names that look like the cat just walked over the keyboard
> are difficult to read. Please use some separators here!
>
> Also, same comment as previous about mixing code and declarations.
>
> > +
> > + snprintf(u.name, sizeof(u.name), "%s/%s", sb->s_type->name, sb->s_sysfs_name);
>
> What happens if the combined path overflows the fssysfspath
> buffer?
It won't; s_sysfs_name is going to be either a UUID or a short device
name. It can't be a longer device name (like we show in /proc/mounts) -
here that would lead to ambiguouty.
> > + char s_sysfs_name[UUID_STRING_LEN + 1];
>
> Can we just kstrdup() the sysfs name and keep a {ptr, len} pair
> in the sb for this? Then we can treat them as an opaque identifier
> that isn't actually a string, and we don't have to make up some
> arbitrary maximum name size, either.
What if we went in a different direction - take your previous suggestion
to have a helper for setting the name, and then enforce through the API
that the only valid identifiers are a UUID or a short device name.
super_set_sysfs_identifier_uuid(sb);
super_set_sysfs_identifier_device(sb);
then we can enforce that the identifier comes from either sb->s_uuid or
sb->s_dev.
I'll have to check existing filesystems before we commit to that,
though.
>
> > unsigned int s_max_links;
> >
> > diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> > index 16a6ecadfd8d..c0f7bd4b6350 100644
> > --- a/include/uapi/linux/fs.h
> > +++ b/include/uapi/linux/fs.h
> > @@ -77,6 +77,10 @@ struct fsuuid2 {
> > __u8 uuid[16];
> > };
> >
> > +struct fssysfspath {
> > + __u8 name[128];
> > +};
>
> Undocumented magic number in a UAPI. Why was this chosen?
In this case, I think the magic number is fine - though it could use a
comment; since it only needs to be used in one place giving it a name is
a bit pointless.
As to why it was chosen - 64 is the next power of two size up from the
length of a uuid string length, and 64 should fit any UUID + filesystem
name. So took that and doubled it.
> IMO, we shouldn't be returning strings that require the the kernel
> to place NULLs correctly and for the caller to detect said NULLs to
> determine their length, so something like:
>
> struct fs_sysfs_path {
> __u32 name_len;
> __u8 name[NAME_MAX];
> };
>
> Seems better to me...
I suppose modern languages are getting away from the whole
nul-terminated string thing, heh
@@ -776,6 +776,20 @@ static int ioctl_getfsuuid(struct file *file, void __user *argp)
return copy_to_user(argp, &u, sizeof(u)) ? -EFAULT : 0;
}
+static int ioctl_get_fs_sysfs_path(struct file *file, void __user *argp)
+{
+ struct super_block *sb = file_inode(file)->i_sb;
+
+ if (!strlen(sb->s_sysfs_name))
+ return -ENOIOCTLCMD;
+
+ struct fssysfspath u = {};
+
+ snprintf(u.name, sizeof(u.name), "%s/%s", sb->s_type->name, sb->s_sysfs_name);
+
+ return copy_to_user(argp, &u, sizeof(u)) ? -EFAULT : 0;
+}
+
/*
* do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
* It's just a simple helper for sys_ioctl and compat_sys_ioctl.
@@ -861,6 +875,9 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
case FS_IOC_GETFSUUID:
return ioctl_getfsuuid(filp, argp);
+ case FS_IOC_GETFSSYSFSPATH:
+ return ioctl_get_fs_sysfs_path(filp, argp);
+
default:
if (S_ISREG(inode->i_mode))
return file_ioctl(filp, cmd, argp);
@@ -1258,6 +1258,7 @@ struct super_block {
char s_id[32]; /* Informational name */
uuid_t s_uuid; /* UUID */
u8 s_uuid_len; /* Default 16, possibly smaller for weird filesystems */
+ char s_sysfs_name[UUID_STRING_LEN + 1];
unsigned int s_max_links;
@@ -77,6 +77,10 @@ struct fsuuid2 {
__u8 uuid[16];
};
+struct fssysfspath {
+ __u8 name[128];
+};
+
/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
#define FILE_DEDUPE_RANGE_SAME 0
#define FILE_DEDUPE_RANGE_DIFFERS 1
@@ -206,6 +210,12 @@ struct fsxattr {
/* Returns the external filesystem UUID, the same one blkid returns */
#define FS_IOC_GETFSUUID _IOR(0x12, 142, struct fsuuid2)
+/*
+ * Returns the path component under /sys/fs/ that refers to this filesystem;
+ * also /sys/kernel/debug/ for filesystems with debugfs exports
+ */
+#define FS_IOC_GETFSSYSFSPATH _IOR(0x12, 143, struct fssysfspath)
+
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */
#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */