[v14,02/11] Add infrastructure for copy offload in block and request layer.
Commit Message
We add two new opcode REQ_OP_COPY_SRC, REQ_OP_COPY_DST.
Since copy is a composite operation involving src and dst sectors/lba,
each needs to be represented by a separate bio to make it compatible
with device mapper.
We expect caller to take a plug and send bio with source information,
followed by bio with destination information.
Once the src bio arrives we form a request and wait for destination
bio. Upon arrival of destination we merge these two bio's and send
corresponding request down to device driver.
Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
---
block/blk-core.c | 7 +++++++
block/blk-merge.c | 41 +++++++++++++++++++++++++++++++++++++++
block/blk.h | 16 +++++++++++++++
block/elevator.h | 1 +
include/linux/bio.h | 6 +-----
include/linux/blk_types.h | 10 ++++++++++
6 files changed, 76 insertions(+), 5 deletions(-)
Comments
On 8/11/23 03:52, Nitesh Shetty wrote:
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 0bad62cca3d0..de0ad7a0d571 100644
> +static inline bool op_is_copy(blk_opf_t op)
> +{
> + return ((op & REQ_OP_MASK) == REQ_OP_COPY_SRC ||
> + (op & REQ_OP_MASK) == REQ_OP_COPY_DST);
> +}
> +
The above function should be moved into include/linux/blk-mq.h below the
definition of req_op() such that it can use req_op() instead of
open-coding it.
Thanks,
Bart.
On 8/11/23 03:52, Nitesh Shetty wrote:
> We expect caller to take a plug and send bio with source information,
> followed by bio with destination information.
> Once the src bio arrives we form a request and wait for destination
> bio. Upon arrival of destination we merge these two bio's and send
> corresponding request down to device driver.
Is the above description up-to-date? In the cover letter there is a
different description of how copy offloading works.
Thanks,
Bart.
On 23/08/11 02:58PM, Bart Van Assche wrote:
>On 8/11/23 03:52, Nitesh Shetty wrote:
>>We expect caller to take a plug and send bio with source information,
>>followed by bio with destination information.
>>Once the src bio arrives we form a request and wait for destination
>>bio. Upon arrival of destination we merge these two bio's and send
>>corresponding request down to device driver.
>
>Is the above description up-to-date? In the cover letter there is a
>different description of how copy offloading works.
>
Acked, This description is up to date.
We need to update this description in cover letter.
Thank you,
Nitesh Shetty
On 23/08/11 02:25PM, Bart Van Assche wrote:
>On 8/11/23 03:52, Nitesh Shetty wrote:
>>diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>>index 0bad62cca3d0..de0ad7a0d571 100644
>>+static inline bool op_is_copy(blk_opf_t op)
>>+{
>>+ return ((op & REQ_OP_MASK) == REQ_OP_COPY_SRC ||
>>+ (op & REQ_OP_MASK) == REQ_OP_COPY_DST);
>>+}
>>+
>
>The above function should be moved into include/linux/blk-mq.h below the
>definition of req_op() such that it can use req_op() instead of
>open-coding it.
>
We use this later for dm patches(patch 9) as well, and we don't have request at
that time.
Thank you,
Nitesh Shetty
On 8/14/23 05:18, Nitesh Shetty wrote:
> On 23/08/11 02:25PM, Bart Van Assche wrote:
>> On 8/11/23 03:52, Nitesh Shetty wrote:
>>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>>> index 0bad62cca3d0..de0ad7a0d571 100644
>>> +static inline bool op_is_copy(blk_opf_t op)
>>> +{
>>> + return ((op & REQ_OP_MASK) == REQ_OP_COPY_SRC ||
>>> + (op & REQ_OP_MASK) == REQ_OP_COPY_DST);
>>> +}
>>> +
>>
>> The above function should be moved into include/linux/blk-mq.h below the
>> definition of req_op() such that it can use req_op() instead of
>> open-coding it.
>>
> We use this later for dm patches(patch 9) as well, and we don't have
> request at
> that time.
My understanding is that include/linux/blk_types.h should only contain
data types and constants and hence that inline functions like
op_is_copy() should be moved elsewhere.
Bart.
@@ -121,6 +121,8 @@ static const char *const blk_op_name[] = {
REQ_OP_NAME(ZONE_FINISH),
REQ_OP_NAME(ZONE_APPEND),
REQ_OP_NAME(WRITE_ZEROES),
+ REQ_OP_NAME(COPY_SRC),
+ REQ_OP_NAME(COPY_DST),
REQ_OP_NAME(DRV_IN),
REQ_OP_NAME(DRV_OUT),
};
@@ -796,6 +798,11 @@ void submit_bio_noacct(struct bio *bio)
if (!q->limits.max_write_zeroes_sectors)
goto not_supported;
break;
+ case REQ_OP_COPY_SRC:
+ case REQ_OP_COPY_DST:
+ if (!q->limits.max_copy_sectors)
+ goto not_supported;
+ break;
default:
break;
}
@@ -158,6 +158,20 @@ static struct bio *bio_split_write_zeroes(struct bio *bio,
return bio_split(bio, lim->max_write_zeroes_sectors, GFP_NOIO, bs);
}
+static struct bio *bio_split_copy(struct bio *bio,
+ const struct queue_limits *lim,
+ unsigned int *nsegs)
+{
+ *nsegs = 1;
+ if (bio_sectors(bio) <= lim->max_copy_sectors)
+ return NULL;
+ /*
+ * We don't support splitting for a copy bio. End it with EIO if
+ * splitting is required and return an error pointer.
+ */
+ return ERR_PTR(-EIO);
+}
+
/*
* Return the maximum number of sectors from the start of a bio that may be
* submitted as a single request to a block device. If enough sectors remain,
@@ -366,6 +380,12 @@ struct bio *__bio_split_to_limits(struct bio *bio,
case REQ_OP_WRITE_ZEROES:
split = bio_split_write_zeroes(bio, lim, nr_segs, bs);
break;
+ case REQ_OP_COPY_SRC:
+ case REQ_OP_COPY_DST:
+ split = bio_split_copy(bio, lim, nr_segs);
+ if (IS_ERR(split))
+ return NULL;
+ break;
default:
split = bio_split_rw(bio, lim, nr_segs, bs,
get_max_io_size(bio, lim) << SECTOR_SHIFT);
@@ -922,6 +942,9 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (!rq_mergeable(rq) || !bio_mergeable(bio))
return false;
+ if (blk_copy_offload_mergable(rq, bio))
+ return true;
+
if (req_op(rq) != bio_op(bio))
return false;
@@ -951,6 +974,8 @@ enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
{
if (blk_discard_mergable(rq))
return ELEVATOR_DISCARD_MERGE;
+ else if (blk_copy_offload_mergable(rq, bio))
+ return ELEVATOR_COPY_OFFLOAD_MERGE;
else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
return ELEVATOR_BACK_MERGE;
else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
@@ -1053,6 +1078,20 @@ static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
return BIO_MERGE_FAILED;
}
+static enum bio_merge_status bio_attempt_copy_offload_merge(struct request *req,
+ struct bio *bio)
+{
+ if (req->__data_len != bio->bi_iter.bi_size)
+ return BIO_MERGE_FAILED;
+
+ req->biotail->bi_next = bio;
+ req->biotail = bio;
+ req->nr_phys_segments++;
+ req->__data_len += bio->bi_iter.bi_size;
+
+ return BIO_MERGE_OK;
+}
+
static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
struct request *rq,
struct bio *bio,
@@ -1073,6 +1112,8 @@ static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
break;
case ELEVATOR_DISCARD_MERGE:
return bio_attempt_discard_merge(q, rq, bio);
+ case ELEVATOR_COPY_OFFLOAD_MERGE:
+ return bio_attempt_copy_offload_merge(rq, bio);
default:
return BIO_MERGE_NONE;
}
@@ -155,6 +155,20 @@ static inline bool blk_discard_mergable(struct request *req)
return false;
}
+/*
+ * Copy offload sends a pair of bio with REQ_OP_COPY_SRC and REQ_OP_COPY_DST
+ * operation by taking a plug.
+ * Initially SRC bio is sent which forms a request and
+ * waits for DST bio to arrive. Once DST bio arrives
+ * we merge it and send request down to driver.
+ */
+static inline bool blk_copy_offload_mergable(struct request *req,
+ struct bio *bio)
+{
+ return (req_op(req) == REQ_OP_COPY_SRC &&
+ bio_op(bio) == REQ_OP_COPY_DST);
+}
+
static inline unsigned int blk_rq_get_max_segments(struct request *rq)
{
if (req_op(rq) == REQ_OP_DISCARD)
@@ -297,6 +311,8 @@ static inline bool bio_may_exceed_limits(struct bio *bio,
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
case REQ_OP_WRITE_ZEROES:
+ case REQ_OP_COPY_SRC:
+ case REQ_OP_COPY_DST:
return true; /* non-trivial splitting decisions */
default:
break;
@@ -18,6 +18,7 @@ enum elv_merge {
ELEVATOR_FRONT_MERGE = 1,
ELEVATOR_BACK_MERGE = 2,
ELEVATOR_DISCARD_MERGE = 3,
+ ELEVATOR_COPY_OFFLOAD_MERGE = 4,
};
struct blk_mq_alloc_data;
@@ -53,11 +53,7 @@ static inline unsigned int bio_max_segs(unsigned int nr_segs)
*/
static inline bool bio_has_data(struct bio *bio)
{
- if (bio &&
- bio->bi_iter.bi_size &&
- bio_op(bio) != REQ_OP_DISCARD &&
- bio_op(bio) != REQ_OP_SECURE_ERASE &&
- bio_op(bio) != REQ_OP_WRITE_ZEROES)
+ if (bio && (bio_op(bio) == REQ_OP_READ || bio_op(bio) == REQ_OP_WRITE))
return true;
return false;
@@ -394,6 +394,10 @@ enum req_op {
/* reset all the zone present on the device */
REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)17,
+ /* copy offload dst and src operation */
+ REQ_OP_COPY_SRC = (__force blk_opf_t)19,
+ REQ_OP_COPY_DST = (__force blk_opf_t)21,
+
/* Driver private requests */
REQ_OP_DRV_IN = (__force blk_opf_t)34,
REQ_OP_DRV_OUT = (__force blk_opf_t)35,
@@ -482,6 +486,12 @@ static inline bool op_is_write(blk_opf_t op)
return !!(op & (__force blk_opf_t)1);
}
+static inline bool op_is_copy(blk_opf_t op)
+{
+ return ((op & REQ_OP_MASK) == REQ_OP_COPY_SRC ||
+ (op & REQ_OP_MASK) == REQ_OP_COPY_DST);
+}
+
/*
* Check if the bio or request is one that needs special treatment in the
* flush state machine.