[v4,06/11] block: Add atomic write support for statx
Commit Message
From: Prasad Singamsetty <prasad.singamsetty@oracle.com>
Extend statx system call to return additional info for atomic write support
support if the specified file is a block device.
Signed-off-by: Prasad Singamsetty <prasad.singamsetty@oracle.com>
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
block/bdev.c | 37 +++++++++++++++++++++++++++----------
fs/stat.c | 13 ++++++-------
include/linux/blkdev.h | 5 +++--
3 files changed, 36 insertions(+), 19 deletions(-)
Comments
> +#define BDEV_STATX_SUPPORTED_MASK (STATX_DIOALIGN | STATX_WRITE_ATOMIC)
> + if (!(request_mask & BDEV_STATX_SUPPORTED_MASK))
> + return;
BDEV_STATX_SUPPORTED_MASK is misleading here. bdevs support a lot more
fields, these are just the ones needing special attention. I'd do away
with the extra define and just open code it.
> + /* If this is a block device inode, override the filesystem
> + * attributes with the block device specific parameters
> + * that need to be obtained from the bdev backing inode
> + */
This is not the normal kernel multi-line comment format.
> + if (S_ISBLK(d_backing_inode(path.dentry)->i_mode))
> + bdev_statx(path.dentry, stat, request_mask);
I know I touched this last, but does anyone remember why we have
various random fixups in vfs_statx and not in vfs_getattr_nosec, where
they we have more of them and also the inode at hand?
On 20/02/2024 08:29, Christoph Hellwig wrote:
>> +#define BDEV_STATX_SUPPORTED_MASK (STATX_DIOALIGN | STATX_WRITE_ATOMIC)
>
>> + if (!(request_mask & BDEV_STATX_SUPPORTED_MASK))
>> + return;
>
> BDEV_STATX_SUPPORTED_MASK is misleading here. bdevs support a lot more
> fields, these are just the ones needing special attention. I'd do away
> with the extra define and just open code it.
ok, fine
>
>> + /* If this is a block device inode, override the filesystem
>> + * attributes with the block device specific parameters
>> + * that need to be obtained from the bdev backing inode
>> + */
>
> This is not the normal kernel multi-line comment format.
>
will fix
>> + if (S_ISBLK(d_backing_inode(path.dentry)->i_mode))
>> + bdev_statx(path.dentry, stat, request_mask);
>
> I know I touched this last, but does anyone remember why we have
> various random fixups in vfs_statx and not in vfs_getattr_nosec, where
> they we have more of them and also the inode at hand?
John Garry <john.g.garry@oracle.com> writes:
> From: Prasad Singamsetty <prasad.singamsetty@oracle.com>
>
> Extend statx system call to return additional info for atomic write support
> support if the specified file is a block device.
>
> Signed-off-by: Prasad Singamsetty <prasad.singamsetty@oracle.com>
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
> block/bdev.c | 37 +++++++++++++++++++++++++++----------
> fs/stat.c | 13 ++++++-------
> include/linux/blkdev.h | 5 +++--
> 3 files changed, 36 insertions(+), 19 deletions(-)
>
> diff --git a/block/bdev.c b/block/bdev.c
> index e9f1b12bd75c..0dada9902bd4 100644
> --- a/block/bdev.c
> +++ b/block/bdev.c
> @@ -1116,24 +1116,41 @@ void sync_bdevs(bool wait)
> iput(old_inode);
> }
>
> +#define BDEV_STATX_SUPPORTED_MASK (STATX_DIOALIGN | STATX_WRITE_ATOMIC)
> +
> /*
> - * Handle STATX_DIOALIGN for block devices.
> - *
> - * Note that the inode passed to this is the inode of a block device node file,
> - * not the block device's internal inode. Therefore it is *not* valid to use
> - * I_BDEV() here; the block device has to be looked up by i_rdev instead.
> + * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
> */
> -void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
> +void bdev_statx(struct dentry *dentry, struct kstat *stat, u32 request_mask)
why change this to dentry? Why not keep it as inode itself?
-ritesh
> {
> struct block_device *bdev;
>
> - bdev = blkdev_get_no_open(inode->i_rdev);
> + if (!(request_mask & BDEV_STATX_SUPPORTED_MASK))
> + return;
> +
> + /*
> + * Note that d_backing_inode() returns the inode of a block device node
> + * file, not the block device's internal inode. Therefore it is *not*
> + * valid to use I_BDEV() here; the block device has to be looked up by
> + * i_rdev instead.
> + */
> + bdev = blkdev_get_no_open(d_backing_inode(dentry)->i_rdev);
> if (!bdev)
> return;
>
> - stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
> - stat->dio_offset_align = bdev_logical_block_size(bdev);
> - stat->result_mask |= STATX_DIOALIGN;
> + if (request_mask & STATX_DIOALIGN) {
> + stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
> + stat->dio_offset_align = bdev_logical_block_size(bdev);
> + stat->result_mask |= STATX_DIOALIGN;
> + }
> +
> + if (request_mask & STATX_WRITE_ATOMIC && bdev_can_atomic_write(bdev)) {
> + struct request_queue *bd_queue = bdev->bd_queue;
> +
> + generic_fill_statx_atomic_writes(stat,
> + queue_atomic_write_unit_min_bytes(bd_queue),
> + queue_atomic_write_unit_max_bytes(bd_queue));
> + }
>
> blkdev_put_no_open(bdev);
> }
> diff --git a/fs/stat.c b/fs/stat.c
> index 522787a4ab6a..bd0618477702 100644
> --- a/fs/stat.c
> +++ b/fs/stat.c
> @@ -290,13 +290,12 @@ static int vfs_statx(int dfd, struct filename *filename, int flags,
> stat->attributes |= STATX_ATTR_MOUNT_ROOT;
> stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
>
> - /* Handle STATX_DIOALIGN for block devices. */
> - if (request_mask & STATX_DIOALIGN) {
> - struct inode *inode = d_backing_inode(path.dentry);
> -
> - if (S_ISBLK(inode->i_mode))
> - bdev_statx_dioalign(inode, stat);
> - }
> + /* If this is a block device inode, override the filesystem
> + * attributes with the block device specific parameters
> + * that need to be obtained from the bdev backing inode
> + */
> + if (S_ISBLK(d_backing_inode(path.dentry)->i_mode))
> + bdev_statx(path.dentry, stat, request_mask);
>
> path_put(&path);
> if (retry_estale(error, lookup_flags)) {
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 40ed56ef4937..4f04456f1250 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -1541,7 +1541,7 @@ int sync_blockdev(struct block_device *bdev);
> int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend);
> int sync_blockdev_nowait(struct block_device *bdev);
> void sync_bdevs(bool wait);
> -void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
> +void bdev_statx(struct dentry *dentry, struct kstat *stat, u32 request_mask);
> void printk_all_partitions(void);
> int __init early_lookup_bdev(const char *pathname, dev_t *dev);
> #else
> @@ -1559,7 +1559,8 @@ static inline int sync_blockdev_nowait(struct block_device *bdev)
> static inline void sync_bdevs(bool wait)
> {
> }
> -static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
> +static inline void bdev_statx(struct dentry *dentry, struct kstat *stat,
> + u32 request_mask)
> {
> }
> static inline void printk_all_partitions(void)
> --
> 2.31.1
On 25/02/2024 14:20, Ritesh Harjani (IBM) wrote:
> John Garry <john.g.garry@oracle.com> writes:
>
>> From: Prasad Singamsetty <prasad.singamsetty@oracle.com>
>>
>> Extend statx system call to return additional info for atomic write support
>> support if the specified file is a block device.
>>
>> Signed-off-by: Prasad Singamsetty <prasad.singamsetty@oracle.com>
>> Signed-off-by: John Garry <john.g.garry@oracle.com>
>> ---
>> block/bdev.c | 37 +++++++++++++++++++++++++++----------
>> fs/stat.c | 13 ++++++-------
>> include/linux/blkdev.h | 5 +++--
>> 3 files changed, 36 insertions(+), 19 deletions(-)
>>
>> diff --git a/block/bdev.c b/block/bdev.c
>> index e9f1b12bd75c..0dada9902bd4 100644
>> --- a/block/bdev.c
>> +++ b/block/bdev.c
>> @@ -1116,24 +1116,41 @@ void sync_bdevs(bool wait)
>> iput(old_inode);
>> }
>>
>> +#define BDEV_STATX_SUPPORTED_MASK (STATX_DIOALIGN | STATX_WRITE_ATOMIC)
>> +
>> /*
>> - * Handle STATX_DIOALIGN for block devices.
>> - *
>> - * Note that the inode passed to this is the inode of a block device node file,
>> - * not the block device's internal inode. Therefore it is *not* valid to use
>> - * I_BDEV() here; the block device has to be looked up by i_rdev instead.
>> + * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
>> */
>> -void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
>> +void bdev_statx(struct dentry *dentry, struct kstat *stat, u32 request_mask)
>
> why change this to dentry? Why not keep it as inode itself?
I suppose that I could do that.
Thanks,
John
@@ -1116,24 +1116,41 @@ void sync_bdevs(bool wait)
iput(old_inode);
}
+#define BDEV_STATX_SUPPORTED_MASK (STATX_DIOALIGN | STATX_WRITE_ATOMIC)
+
/*
- * Handle STATX_DIOALIGN for block devices.
- *
- * Note that the inode passed to this is the inode of a block device node file,
- * not the block device's internal inode. Therefore it is *not* valid to use
- * I_BDEV() here; the block device has to be looked up by i_rdev instead.
+ * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
*/
-void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
+void bdev_statx(struct dentry *dentry, struct kstat *stat, u32 request_mask)
{
struct block_device *bdev;
- bdev = blkdev_get_no_open(inode->i_rdev);
+ if (!(request_mask & BDEV_STATX_SUPPORTED_MASK))
+ return;
+
+ /*
+ * Note that d_backing_inode() returns the inode of a block device node
+ * file, not the block device's internal inode. Therefore it is *not*
+ * valid to use I_BDEV() here; the block device has to be looked up by
+ * i_rdev instead.
+ */
+ bdev = blkdev_get_no_open(d_backing_inode(dentry)->i_rdev);
if (!bdev)
return;
- stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
- stat->dio_offset_align = bdev_logical_block_size(bdev);
- stat->result_mask |= STATX_DIOALIGN;
+ if (request_mask & STATX_DIOALIGN) {
+ stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
+ stat->dio_offset_align = bdev_logical_block_size(bdev);
+ stat->result_mask |= STATX_DIOALIGN;
+ }
+
+ if (request_mask & STATX_WRITE_ATOMIC && bdev_can_atomic_write(bdev)) {
+ struct request_queue *bd_queue = bdev->bd_queue;
+
+ generic_fill_statx_atomic_writes(stat,
+ queue_atomic_write_unit_min_bytes(bd_queue),
+ queue_atomic_write_unit_max_bytes(bd_queue));
+ }
blkdev_put_no_open(bdev);
}
@@ -290,13 +290,12 @@ static int vfs_statx(int dfd, struct filename *filename, int flags,
stat->attributes |= STATX_ATTR_MOUNT_ROOT;
stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
- /* Handle STATX_DIOALIGN for block devices. */
- if (request_mask & STATX_DIOALIGN) {
- struct inode *inode = d_backing_inode(path.dentry);
-
- if (S_ISBLK(inode->i_mode))
- bdev_statx_dioalign(inode, stat);
- }
+ /* If this is a block device inode, override the filesystem
+ * attributes with the block device specific parameters
+ * that need to be obtained from the bdev backing inode
+ */
+ if (S_ISBLK(d_backing_inode(path.dentry)->i_mode))
+ bdev_statx(path.dentry, stat, request_mask);
path_put(&path);
if (retry_estale(error, lookup_flags)) {
@@ -1541,7 +1541,7 @@ int sync_blockdev(struct block_device *bdev);
int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend);
int sync_blockdev_nowait(struct block_device *bdev);
void sync_bdevs(bool wait);
-void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
+void bdev_statx(struct dentry *dentry, struct kstat *stat, u32 request_mask);
void printk_all_partitions(void);
int __init early_lookup_bdev(const char *pathname, dev_t *dev);
#else
@@ -1559,7 +1559,8 @@ static inline int sync_blockdev_nowait(struct block_device *bdev)
static inline void sync_bdevs(bool wait)
{
}
-static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
+static inline void bdev_statx(struct dentry *dentry, struct kstat *stat,
+ u32 request_mask)
{
}
static inline void printk_all_partitions(void)