[v13,1/9] block: Introduce queue limits for copy-offload support
Commit Message
Add device limits as sysfs entries,
- copy_offload (RW)
- copy_max_bytes (RW)
- copy_max_bytes_hw (RO)
Above limits help to split the copy payload in block layer.
copy_offload: used for setting copy offload(1) or emulation(0).
copy_max_bytes: maximum total length of copy in single payload.
copy_max_bytes_hw: Reflects the device supported maximum limit.
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
---
Documentation/ABI/stable/sysfs-block | 33 +++++++++++++++
block/blk-settings.c | 24 +++++++++++
block/blk-sysfs.c | 63 ++++++++++++++++++++++++++++
include/linux/blkdev.h | 12 ++++++
include/uapi/linux/fs.h | 3 ++
5 files changed, 135 insertions(+)
Comments
On 6/28/23 03:36, Nitesh Shetty wrote:
> Add device limits as sysfs entries,
> - copy_offload (RW)
> - copy_max_bytes (RW)
> - copy_max_bytes_hw (RO)
>
> Above limits help to split the copy payload in block layer.
> copy_offload: used for setting copy offload(1) or emulation(0).
> copy_max_bytes: maximum total length of copy in single payload.
> copy_max_bytes_hw: Reflects the device supported maximum limit.
>
> Reviewed-by: Hannes Reinecke <hare@suse.de>
> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> ---
> Documentation/ABI/stable/sysfs-block | 33 +++++++++++++++
> block/blk-settings.c | 24 +++++++++++
> block/blk-sysfs.c | 63 ++++++++++++++++++++++++++++
> include/linux/blkdev.h | 12 ++++++
> include/uapi/linux/fs.h | 3 ++
> 5 files changed, 135 insertions(+)
>
> diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
> index c57e5b7cb532..3c97303f658b 100644
> --- a/Documentation/ABI/stable/sysfs-block
> +++ b/Documentation/ABI/stable/sysfs-block
> @@ -155,6 +155,39 @@ Description:
> last zone of the device which may be smaller.
>
>
> +What: /sys/block/<disk>/queue/copy_offload
> +Date: June 2023
> +Contact: linux-block@vger.kernel.org
> +Description:
> + [RW] When read, this file shows whether offloading copy to a
> + device is enabled (1) or disabled (0). Writing '0' to this
> + file will disable offloading copies for this device.
> + Writing any '1' value will enable this feature. If the device
> + does not support offloading, then writing 1, will result in an
> + error.
I am still not convinced that this one is really necessary. copy_max_bytes_hw !=
0 indicates that the devices supports copy offload. And setting copy_max_bytes
to 0 can be used to disable copy offload (which probably should be the default
for now).
> +
> +
> +What: /sys/block/<disk>/queue/copy_max_bytes
> +Date: June 2023
> +Contact: linux-block@vger.kernel.org
> +Description:
> + [RW] This is the maximum number of bytes that the block layer
> + will allow for a copy request. This will is always smaller or
will is -> is
> + equal to the maximum size allowed by the hardware, indicated by
> + 'copy_max_bytes_hw'. An attempt to set a value higher than
> + 'copy_max_bytes_hw' will truncate this to 'copy_max_bytes_hw'.
> +
> +
> +What: /sys/block/<disk>/queue/copy_max_bytes_hw
Nit: In keeping with the spirit of attributes like
max_hw_sectors_kb/max_sectors_kb, I would call this one copy_max_hw_bytes.
> +Date: June 2023
> +Contact: linux-block@vger.kernel.org
> +Description:
> + [RO] This is the maximum number of bytes that the hardware
> + will allow for single data copy request.
> + A value of 0 means that the device does not support
> + copy offload.
> +
> +
> What: /sys/block/<disk>/queue/crypto/
> Date: February 2022
> Contact: linux-block@vger.kernel.org
> diff --git a/block/blk-settings.c b/block/blk-settings.c
> index 4dd59059b788..738cd3f21259 100644
> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> @@ -59,6 +59,8 @@ void blk_set_default_limits(struct queue_limits *lim)
> lim->zoned = BLK_ZONED_NONE;
> lim->zone_write_granularity = 0;
> lim->dma_alignment = 511;
> + lim->max_copy_sectors_hw = 0;
> + lim->max_copy_sectors = 0;
> }
>
> /**
> @@ -82,6 +84,8 @@ void blk_set_stacking_limits(struct queue_limits *lim)
> lim->max_dev_sectors = UINT_MAX;
> lim->max_write_zeroes_sectors = UINT_MAX;
> lim->max_zone_append_sectors = UINT_MAX;
> + lim->max_copy_sectors_hw = UINT_MAX;
> + lim->max_copy_sectors = UINT_MAX;
> }
> EXPORT_SYMBOL(blk_set_stacking_limits);
>
> @@ -183,6 +187,22 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
> }
> EXPORT_SYMBOL(blk_queue_max_discard_sectors);
>
> +/**
> + * blk_queue_max_copy_sectors_hw - set max sectors for a single copy payload
> + * @q: the request queue for the device
> + * @max_copy_sectors: maximum number of sectors to copy
> + **/
> +void blk_queue_max_copy_sectors_hw(struct request_queue *q,
> + unsigned int max_copy_sectors)
> +{
> + if (max_copy_sectors > (COPY_MAX_BYTES >> SECTOR_SHIFT))
> + max_copy_sectors = COPY_MAX_BYTES >> SECTOR_SHIFT;
> +
> + q->limits.max_copy_sectors_hw = max_copy_sectors;
> + q->limits.max_copy_sectors = max_copy_sectors;
> +}
> +EXPORT_SYMBOL_GPL(blk_queue_max_copy_sectors_hw);
> +
> /**
> * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase
> * @q: the request queue for the device
> @@ -578,6 +598,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
> t->max_segment_size = min_not_zero(t->max_segment_size,
> b->max_segment_size);
>
> + t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors);
> + t->max_copy_sectors_hw = min(t->max_copy_sectors_hw,
> + b->max_copy_sectors_hw);
> +
> t->misaligned |= b->misaligned;
>
> alignment = queue_limit_alignment_offset(b, start);
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index afc797fb0dfc..43551778d035 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -199,6 +199,62 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
> return queue_var_show(0, page);
> }
>
> +static ssize_t queue_copy_offload_show(struct request_queue *q, char *page)
> +{
> + return queue_var_show(blk_queue_copy(q), page);
> +}
> +
> +static ssize_t queue_copy_offload_store(struct request_queue *q,
> + const char *page, size_t count)
> +{
> + unsigned long copy_offload;
> + ssize_t ret = queue_var_store(©_offload, page, count);
> +
> + if (ret < 0)
> + return ret;
> +
> + if (copy_offload && !q->limits.max_copy_sectors_hw)
> + return -EINVAL;
> +
> + if (copy_offload)
> + blk_queue_flag_set(QUEUE_FLAG_COPY, q);
> + else
> + blk_queue_flag_clear(QUEUE_FLAG_COPY, q);
> +
> + return count;
> +}
See above. I think we can drop this attribute.
> +
> +static ssize_t queue_copy_max_hw_show(struct request_queue *q, char *page)
> +{
> + return sprintf(page, "%llu\n", (unsigned long long)
> + q->limits.max_copy_sectors_hw << SECTOR_SHIFT);
> +}
> +
> +static ssize_t queue_copy_max_show(struct request_queue *q, char *page)
> +{
> + return sprintf(page, "%llu\n", (unsigned long long)
> + q->limits.max_copy_sectors << SECTOR_SHIFT);
> +}
> +
> +static ssize_t queue_copy_max_store(struct request_queue *q,
> + const char *page, size_t count)
> +{
> + unsigned long max_copy;
> + ssize_t ret = queue_var_store(&max_copy, page, count);
> +
> + if (ret < 0)
> + return ret;
> +
> + if (max_copy & (queue_logical_block_size(q) - 1))
> + return -EINVAL;
> +
> + max_copy >>= SECTOR_SHIFT;
> + q->limits.max_copy_sectors = min_t(unsigned int, max_copy,
> + q->limits.max_copy_sectors_hw);
> +
> + return count;
> +}
> +
> static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
> {
> return queue_var_show(0, page);
> @@ -522,6 +578,10 @@ QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
> QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
> QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
>
> +QUEUE_RW_ENTRY(queue_copy_offload, "copy_offload");
> +QUEUE_RO_ENTRY(queue_copy_max_hw, "copy_max_bytes_hw");
> +QUEUE_RW_ENTRY(queue_copy_max, "copy_max_bytes");
> +
> QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
> QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
> QUEUE_RW_ENTRY(queue_poll, "io_poll");
> @@ -638,6 +698,9 @@ static struct attribute *queue_attrs[] = {
> &queue_discard_max_entry.attr,
> &queue_discard_max_hw_entry.attr,
> &queue_discard_zeroes_data_entry.attr,
> + &queue_copy_offload_entry.attr,
> + &queue_copy_max_hw_entry.attr,
> + &queue_copy_max_entry.attr,
> &queue_write_same_max_entry.attr,
> &queue_write_zeroes_max_entry.attr,
> &queue_zone_append_max_entry.attr,
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index ed44a997f629..6098665953e6 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -309,6 +309,9 @@ struct queue_limits {
> unsigned int discard_alignment;
> unsigned int zone_write_granularity;
>
> + unsigned int max_copy_sectors_hw;
> + unsigned int max_copy_sectors;
> +
> unsigned short max_segments;
> unsigned short max_integrity_segments;
> unsigned short max_discard_segments;
> @@ -554,6 +557,7 @@ struct request_queue {
> #define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */
> #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */
> #define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/
> +#define QUEUE_FLAG_COPY 32 /* enable/disable device copy offload */
>
> #define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_IO_STAT) | \
> (1UL << QUEUE_FLAG_SAME_COMP) | \
> @@ -574,6 +578,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
> test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags)
> #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
> #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
> +#define blk_queue_copy(q) test_bit(QUEUE_FLAG_COPY, &(q)->queue_flags)
> #define blk_queue_zone_resetall(q) \
> test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags)
> #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
> @@ -891,6 +896,8 @@ extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
> extern void blk_queue_max_segments(struct request_queue *, unsigned short);
> extern void blk_queue_max_discard_segments(struct request_queue *,
> unsigned short);
> +extern void blk_queue_max_copy_sectors_hw(struct request_queue *q,
> + unsigned int max_copy_sectors);
> void blk_queue_max_secure_erase_sectors(struct request_queue *q,
> unsigned int max_sectors);
> extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
> @@ -1210,6 +1217,11 @@ static inline unsigned int bdev_discard_granularity(struct block_device *bdev)
> return bdev_get_queue(bdev)->limits.discard_granularity;
> }
>
> +static inline unsigned int bdev_max_copy_sectors(struct block_device *bdev)
> +{
> + return bdev_get_queue(bdev)->limits.max_copy_sectors;
> +}
> +
> static inline unsigned int
> bdev_max_secure_erase_sectors(struct block_device *bdev)
> {
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index b7b56871029c..dff56813f95a 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -64,6 +64,9 @@ struct fstrim_range {
> __u64 minlen;
> };
>
> +/* maximum copy offload length, this is set to 128MB based on current testing */
> +#define COPY_MAX_BYTES (1 << 27)
> +
> /* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
> #define FILE_DEDUPE_RANGE_SAME 0
> #define FILE_DEDUPE_RANGE_DIFFERS 1
On 23/06/28 03:40PM, Damien Le Moal wrote:
>On 6/28/23 03:36, Nitesh Shetty wrote:
>> Add device limits as sysfs entries,
>> - copy_offload (RW)
>> - copy_max_bytes (RW)
>> - copy_max_bytes_hw (RO)
>>
>> Above limits help to split the copy payload in block layer.
>> copy_offload: used for setting copy offload(1) or emulation(0).
>> copy_max_bytes: maximum total length of copy in single payload.
>> copy_max_bytes_hw: Reflects the device supported maximum limit.
>>
>> Reviewed-by: Hannes Reinecke <hare@suse.de>
>> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
>> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
>> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
>> ---
>> Documentation/ABI/stable/sysfs-block | 33 +++++++++++++++
>> block/blk-settings.c | 24 +++++++++++
>> block/blk-sysfs.c | 63 ++++++++++++++++++++++++++++
>> include/linux/blkdev.h | 12 ++++++
>> include/uapi/linux/fs.h | 3 ++
>> 5 files changed, 135 insertions(+)
>>
>> diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
>> index c57e5b7cb532..3c97303f658b 100644
>> --- a/Documentation/ABI/stable/sysfs-block
>> +++ b/Documentation/ABI/stable/sysfs-block
>> @@ -155,6 +155,39 @@ Description:
>> last zone of the device which may be smaller.
>>
>>
>> +What: /sys/block/<disk>/queue/copy_offload
>> +Date: June 2023
>> +Contact: linux-block@vger.kernel.org
>> +Description:
>> + [RW] When read, this file shows whether offloading copy to a
>> + device is enabled (1) or disabled (0). Writing '0' to this
>> + file will disable offloading copies for this device.
>> + Writing any '1' value will enable this feature. If the device
>> + does not support offloading, then writing 1, will result in an
>> + error.
>
>I am still not convinced that this one is really necessary. copy_max_bytes_hw !=
>0 indicates that the devices supports copy offload. And setting copy_max_bytes
>to 0 can be used to disable copy offload (which probably should be the default
>for now).
>
Agreed, we will do this in next iteration.
>> +
>> +
>> +What: /sys/block/<disk>/queue/copy_max_bytes
>> +Date: June 2023
>> +Contact: linux-block@vger.kernel.org
>> +Description:
>> + [RW] This is the maximum number of bytes that the block layer
>> + will allow for a copy request. This will is always smaller or
>
>will is -> is
>
acked
>> + equal to the maximum size allowed by the hardware, indicated by
>> + 'copy_max_bytes_hw'. An attempt to set a value higher than
>> + 'copy_max_bytes_hw' will truncate this to 'copy_max_bytes_hw'.
>> +
>> +
>> +What: /sys/block/<disk>/queue/copy_max_bytes_hw
>
>Nit: In keeping with the spirit of attributes like
>max_hw_sectors_kb/max_sectors_kb, I would call this one copy_max_hw_bytes.
>
acked, will update in next iteration.
>> +Date: June 2023
>> +Contact: linux-block@vger.kernel.org
>> +Description:
>> + [RO] This is the maximum number of bytes that the hardware
>> + will allow for single data copy request.
>> + A value of 0 means that the device does not support
>> + copy offload.
>> +
>> +
>> What: /sys/block/<disk>/queue/crypto/
>> Date: February 2022
>> Contact: linux-block@vger.kernel.org
>> diff --git a/block/blk-settings.c b/block/blk-settings.c
>> index 4dd59059b788..738cd3f21259 100644
>> --- a/block/blk-settings.c
>> +++ b/block/blk-settings.c
>> @@ -59,6 +59,8 @@ void blk_set_default_limits(struct queue_limits *lim)
>> lim->zoned = BLK_ZONED_NONE;
>> lim->zone_write_granularity = 0;
>> lim->dma_alignment = 511;
>> + lim->max_copy_sectors_hw = 0;
>> + lim->max_copy_sectors = 0;
>> }
>>
>> /**
>> @@ -82,6 +84,8 @@ void blk_set_stacking_limits(struct queue_limits *lim)
>> lim->max_dev_sectors = UINT_MAX;
>> lim->max_write_zeroes_sectors = UINT_MAX;
>> lim->max_zone_append_sectors = UINT_MAX;
>> + lim->max_copy_sectors_hw = UINT_MAX;
>> + lim->max_copy_sectors = UINT_MAX;
>> }
>> EXPORT_SYMBOL(blk_set_stacking_limits);
>>
>> @@ -183,6 +187,22 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
>> }
>> EXPORT_SYMBOL(blk_queue_max_discard_sectors);
>>
>> +/**
>> + * blk_queue_max_copy_sectors_hw - set max sectors for a single copy payload
>> + * @q: the request queue for the device
>> + * @max_copy_sectors: maximum number of sectors to copy
>> + **/
>> +void blk_queue_max_copy_sectors_hw(struct request_queue *q,
>> + unsigned int max_copy_sectors)
>> +{
>> + if (max_copy_sectors > (COPY_MAX_BYTES >> SECTOR_SHIFT))
>> + max_copy_sectors = COPY_MAX_BYTES >> SECTOR_SHIFT;
>> +
>> + q->limits.max_copy_sectors_hw = max_copy_sectors;
>> + q->limits.max_copy_sectors = max_copy_sectors;
>> +}
>> +EXPORT_SYMBOL_GPL(blk_queue_max_copy_sectors_hw);
>> +
>> /**
>> * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase
>> * @q: the request queue for the device
>> @@ -578,6 +598,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
>> t->max_segment_size = min_not_zero(t->max_segment_size,
>> b->max_segment_size);
>>
>> + t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors);
>> + t->max_copy_sectors_hw = min(t->max_copy_sectors_hw,
>> + b->max_copy_sectors_hw);
>> +
>> t->misaligned |= b->misaligned;
>>
>> alignment = queue_limit_alignment_offset(b, start);
>> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
>> index afc797fb0dfc..43551778d035 100644
>> --- a/block/blk-sysfs.c
>> +++ b/block/blk-sysfs.c
>> @@ -199,6 +199,62 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
>> return queue_var_show(0, page);
>> }
>>
>> +static ssize_t queue_copy_offload_show(struct request_queue *q, char *page)
>> +{
>> + return queue_var_show(blk_queue_copy(q), page);
>> +}
>> +
>> +static ssize_t queue_copy_offload_store(struct request_queue *q,
>> + const char *page, size_t count)
>> +{
>> + unsigned long copy_offload;
>> + ssize_t ret = queue_var_store(©_offload, page, count);
>> +
>> + if (ret < 0)
>> + return ret;
>> +
>> + if (copy_offload && !q->limits.max_copy_sectors_hw)
>> + return -EINVAL;
>> +
>> + if (copy_offload)
>> + blk_queue_flag_set(QUEUE_FLAG_COPY, q);
>> + else
>> + blk_queue_flag_clear(QUEUE_FLAG_COPY, q);
>> +
>> + return count;
>> +}
>
>See above. I think we can drop this attribute.
>
acked
Thank you,
Nitesh Shetty
Just a little heads up: I think we need to properly solve the
hw vs user limit as the various ad-hoc mechanisms tend to be
broken and don't scale. I plan to start looking into that the
next days.
On Wed, Jun 28, 2023 at 12:06:15AM +0530, Nitesh Shetty wrote:
> Add device limits as sysfs entries,
> - copy_offload (RW)
> - copy_max_bytes (RW)
> - copy_max_bytes_hw (RO)
>
> Above limits help to split the copy payload in block layer.
> copy_offload: used for setting copy offload(1) or emulation(0).
> copy_max_bytes: maximum total length of copy in single payload.
> copy_max_bytes_hw: Reflects the device supported maximum limit.
Why do we need the separate copy_offload boolean vs just looking
at the max_bytes value?
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index b7b56871029c..dff56813f95a 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -64,6 +64,9 @@ struct fstrim_range {
> __u64 minlen;
> };
>
> +/* maximum copy offload length, this is set to 128MB based on current testing */
> +#define COPY_MAX_BYTES (1 << 27)
This should not be in the UAPI. It is a totally arbitrary internal
limit.
@@ -155,6 +155,39 @@ Description:
last zone of the device which may be smaller.
+What: /sys/block/<disk>/queue/copy_offload
+Date: June 2023
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] When read, this file shows whether offloading copy to a
+ device is enabled (1) or disabled (0). Writing '0' to this
+ file will disable offloading copies for this device.
+ Writing any '1' value will enable this feature. If the device
+ does not support offloading, then writing 1, will result in an
+ error.
+
+
+What: /sys/block/<disk>/queue/copy_max_bytes
+Date: June 2023
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] This is the maximum number of bytes that the block layer
+ will allow for a copy request. This will is always smaller or
+ equal to the maximum size allowed by the hardware, indicated by
+ 'copy_max_bytes_hw'. An attempt to set a value higher than
+ 'copy_max_bytes_hw' will truncate this to 'copy_max_bytes_hw'.
+
+
+What: /sys/block/<disk>/queue/copy_max_bytes_hw
+Date: June 2023
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] This is the maximum number of bytes that the hardware
+ will allow for single data copy request.
+ A value of 0 means that the device does not support
+ copy offload.
+
+
What: /sys/block/<disk>/queue/crypto/
Date: February 2022
Contact: linux-block@vger.kernel.org
@@ -59,6 +59,8 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->zoned = BLK_ZONED_NONE;
lim->zone_write_granularity = 0;
lim->dma_alignment = 511;
+ lim->max_copy_sectors_hw = 0;
+ lim->max_copy_sectors = 0;
}
/**
@@ -82,6 +84,8 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->max_dev_sectors = UINT_MAX;
lim->max_write_zeroes_sectors = UINT_MAX;
lim->max_zone_append_sectors = UINT_MAX;
+ lim->max_copy_sectors_hw = UINT_MAX;
+ lim->max_copy_sectors = UINT_MAX;
}
EXPORT_SYMBOL(blk_set_stacking_limits);
@@ -183,6 +187,22 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
}
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
+/**
+ * blk_queue_max_copy_sectors_hw - set max sectors for a single copy payload
+ * @q: the request queue for the device
+ * @max_copy_sectors: maximum number of sectors to copy
+ **/
+void blk_queue_max_copy_sectors_hw(struct request_queue *q,
+ unsigned int max_copy_sectors)
+{
+ if (max_copy_sectors > (COPY_MAX_BYTES >> SECTOR_SHIFT))
+ max_copy_sectors = COPY_MAX_BYTES >> SECTOR_SHIFT;
+
+ q->limits.max_copy_sectors_hw = max_copy_sectors;
+ q->limits.max_copy_sectors = max_copy_sectors;
+}
+EXPORT_SYMBOL_GPL(blk_queue_max_copy_sectors_hw);
+
/**
* blk_queue_max_secure_erase_sectors - set max sectors for a secure erase
* @q: the request queue for the device
@@ -578,6 +598,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_segment_size = min_not_zero(t->max_segment_size,
b->max_segment_size);
+ t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors);
+ t->max_copy_sectors_hw = min(t->max_copy_sectors_hw,
+ b->max_copy_sectors_hw);
+
t->misaligned |= b->misaligned;
alignment = queue_limit_alignment_offset(b, start);
@@ -199,6 +199,62 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
return queue_var_show(0, page);
}
+static ssize_t queue_copy_offload_show(struct request_queue *q, char *page)
+{
+ return queue_var_show(blk_queue_copy(q), page);
+}
+
+static ssize_t queue_copy_offload_store(struct request_queue *q,
+ const char *page, size_t count)
+{
+ unsigned long copy_offload;
+ ssize_t ret = queue_var_store(©_offload, page, count);
+
+ if (ret < 0)
+ return ret;
+
+ if (copy_offload && !q->limits.max_copy_sectors_hw)
+ return -EINVAL;
+
+ if (copy_offload)
+ blk_queue_flag_set(QUEUE_FLAG_COPY, q);
+ else
+ blk_queue_flag_clear(QUEUE_FLAG_COPY, q);
+
+ return count;
+}
+
+static ssize_t queue_copy_max_hw_show(struct request_queue *q, char *page)
+{
+ return sprintf(page, "%llu\n", (unsigned long long)
+ q->limits.max_copy_sectors_hw << SECTOR_SHIFT);
+}
+
+static ssize_t queue_copy_max_show(struct request_queue *q, char *page)
+{
+ return sprintf(page, "%llu\n", (unsigned long long)
+ q->limits.max_copy_sectors << SECTOR_SHIFT);
+}
+
+static ssize_t queue_copy_max_store(struct request_queue *q,
+ const char *page, size_t count)
+{
+ unsigned long max_copy;
+ ssize_t ret = queue_var_store(&max_copy, page, count);
+
+ if (ret < 0)
+ return ret;
+
+ if (max_copy & (queue_logical_block_size(q) - 1))
+ return -EINVAL;
+
+ max_copy >>= SECTOR_SHIFT;
+ q->limits.max_copy_sectors = min_t(unsigned int, max_copy,
+ q->limits.max_copy_sectors_hw);
+
+ return count;
+}
+
static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
{
return queue_var_show(0, page);
@@ -522,6 +578,10 @@ QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
+QUEUE_RW_ENTRY(queue_copy_offload, "copy_offload");
+QUEUE_RO_ENTRY(queue_copy_max_hw, "copy_max_bytes_hw");
+QUEUE_RW_ENTRY(queue_copy_max, "copy_max_bytes");
+
QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
QUEUE_RW_ENTRY(queue_poll, "io_poll");
@@ -638,6 +698,9 @@ static struct attribute *queue_attrs[] = {
&queue_discard_max_entry.attr,
&queue_discard_max_hw_entry.attr,
&queue_discard_zeroes_data_entry.attr,
+ &queue_copy_offload_entry.attr,
+ &queue_copy_max_hw_entry.attr,
+ &queue_copy_max_entry.attr,
&queue_write_same_max_entry.attr,
&queue_write_zeroes_max_entry.attr,
&queue_zone_append_max_entry.attr,
@@ -309,6 +309,9 @@ struct queue_limits {
unsigned int discard_alignment;
unsigned int zone_write_granularity;
+ unsigned int max_copy_sectors_hw;
+ unsigned int max_copy_sectors;
+
unsigned short max_segments;
unsigned short max_integrity_segments;
unsigned short max_discard_segments;
@@ -554,6 +557,7 @@ struct request_queue {
#define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */
#define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */
#define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/
+#define QUEUE_FLAG_COPY 32 /* enable/disable device copy offload */
#define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_IO_STAT) | \
(1UL << QUEUE_FLAG_SAME_COMP) | \
@@ -574,6 +578,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags)
#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
+#define blk_queue_copy(q) test_bit(QUEUE_FLAG_COPY, &(q)->queue_flags)
#define blk_queue_zone_resetall(q) \
test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags)
#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
@@ -891,6 +896,8 @@ extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
extern void blk_queue_max_segments(struct request_queue *, unsigned short);
extern void blk_queue_max_discard_segments(struct request_queue *,
unsigned short);
+extern void blk_queue_max_copy_sectors_hw(struct request_queue *q,
+ unsigned int max_copy_sectors);
void blk_queue_max_secure_erase_sectors(struct request_queue *q,
unsigned int max_sectors);
extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
@@ -1210,6 +1217,11 @@ static inline unsigned int bdev_discard_granularity(struct block_device *bdev)
return bdev_get_queue(bdev)->limits.discard_granularity;
}
+static inline unsigned int bdev_max_copy_sectors(struct block_device *bdev)
+{
+ return bdev_get_queue(bdev)->limits.max_copy_sectors;
+}
+
static inline unsigned int
bdev_max_secure_erase_sectors(struct block_device *bdev)
{
@@ -64,6 +64,9 @@ struct fstrim_range {
__u64 minlen;
};
+/* maximum copy offload length, this is set to 128MB based on current testing */
+#define COPY_MAX_BYTES (1 << 27)
+
/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
#define FILE_DEDUPE_RANGE_SAME 0
#define FILE_DEDUPE_RANGE_DIFFERS 1