@@ -22,6 +22,11 @@ enum io_uring_cmd_flags {
IO_URING_F_IOPOLL = (1 << 10),
};
+union io_uring_fused_cmd_data {
+ /* secondary list, fused cmd private, driver do not touch it */
+ struct io_kiocb *__secondary;
+};
+
struct io_uring_cmd {
struct file *file;
const void *cmd;
@@ -33,7 +38,15 @@ struct io_uring_cmd {
};
u32 cmd_op;
u32 flags;
- u8 pdu[32]; /* available inline for free use */
+
+ /* for fused command, the available pdu is a bit less */
+ union {
+ struct {
+ union io_uring_fused_cmd_data data;
+ u8 pdu[24]; /* available inline for free use */
+ } fused;
+ u8 pdu[32]; /* available inline for free use */
+ };
};
#if defined(CONFIG_IO_URING)
@@ -407,6 +407,7 @@ enum {
/* keep async read/write and isreg together and in order */
REQ_F_SUPPORT_NOWAIT_BIT,
REQ_F_ISREG_BIT,
+ REQ_F_FUSED_SECONDARY_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -476,6 +477,8 @@ enum {
REQ_F_CLEAR_POLLIN = BIT_ULL(REQ_F_CLEAR_POLLIN_BIT),
/* hashed into ->cancel_hash_locked, protected by ->uring_lock */
REQ_F_HASH_LOCKED = BIT_ULL(REQ_F_HASH_LOCKED_BIT),
+ /* secondary request in fused cmd, won't be one uring cmd */
+ REQ_F_FUSED_SECONDARY = BIT_ULL(REQ_F_FUSED_SECONDARY_BIT),
};
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
@@ -558,6 +561,9 @@ struct io_kiocb {
* REQ_F_BUFFER_RING is set.
*/
struct io_buffer_list *buf_list;
+
+ /* store fused command's primary request for the secondary */
+ struct io_kiocb *fused_primary_req;
};
union {
@@ -73,6 +73,8 @@ struct io_uring_sqe {
__u16 buf_index;
/* for grouped buffer selection */
__u16 buf_group;
+ /* how many secondary normal SQEs following this fused SQE */
+ __u16 nr_secondary;
} __attribute__((packed));
/* personality to use, if used */
__u16 personality;
@@ -173,6 +175,16 @@ enum {
*/
#define IORING_SETUP_DEFER_TASKRUN (1U << 13)
+/*
+ * Multiple requests submitted as one whole request logically, and the 1st
+ * one is primary request, and the others are secondary requests, which number
+ * can be retrieved from primary SQE.
+ *
+ * Primary request is responsible for submitting secondary requests, and it
+ * won't be completed until all secondary requests are done.
+ */
+#define IORING_SETUP_FUSED_REQ (1U << 14)
+
enum io_uring_op {
IORING_OP_NOP,
IORING_OP_READV,
@@ -223,6 +235,7 @@ enum io_uring_op {
IORING_OP_URING_CMD,
IORING_OP_SEND_ZC,
IORING_OP_SENDMSG_ZC,
+ IORING_OP_FUSED_CMD,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -476,6 +489,7 @@ struct io_uring_params {
#define IORING_FEAT_CQE_SKIP (1U << 11)
#define IORING_FEAT_LINKED_FILE (1U << 12)
#define IORING_FEAT_REG_REG_RING (1U << 13)
+#define IORING_FEAT_FUSED_REQ (1U << 14)
/*
* io_uring_register(2) opcodes and arguments
@@ -7,5 +7,5 @@ obj-$(CONFIG_IO_URING) += io_uring.o xattr.o nop.o fs.o splice.o \
openclose.o uring_cmd.o epoll.o \
statx.o net.o msg_ring.o timeout.o \
sqpoll.o fdinfo.o tctx.o poll.o \
- cancel.o kbuf.o rsrc.o rw.o opdef.o notif.o
+ cancel.o kbuf.o rsrc.o rw.o opdef.o notif.o fused_cmd.o
obj-$(CONFIG_IO_WQ) += io-wq.o
new file mode 100644
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/io_uring.h>
+
+#include <uapi/linux/io_uring.h>
+
+#include "io_uring.h"
+#include "opdef.h"
+#include "uring_cmd.h"
+#include "fused_cmd.h"
+
+static bool io_fused_secondary_valid(u8 op)
+{
+ if (op == IORING_OP_FUSED_CMD)
+ return false;
+
+ if (!io_issue_defs[op].fused_secondary)
+ return false;
+
+ return true;
+}
+
+int io_fused_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+ __must_hold(&req->ctx->uring_lock)
+{
+ struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
+ const struct io_uring_sqe *secondary_sqe = NULL;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_kiocb *secondary;
+ u8 secondary_op;
+ int ret;
+
+ if (!(ctx->flags & IORING_SETUP_FUSED_REQ))
+ return -EINVAL;
+
+ if (unlikely(sqe->__pad1))
+ return -EINVAL;
+
+ /*
+ * Only support single secondary request, in future we may extend to
+ * support multiple secondary requests, which can be covered by
+ * multiple fused command now.
+ */
+ if (unlikely(sqe->nr_secondary != 1))
+ return -EINVAL;
+
+ ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags);
+ if (unlikely(ioucmd->flags))
+ return -EINVAL;
+
+ if (unlikely(!io_get_secondary_sqe(ctx, &secondary_sqe)))
+ return -EINVAL;
+
+ if (unlikely(!secondary_sqe))
+ return -EINVAL;
+
+ secondary_op = READ_ONCE(secondary_sqe->opcode);
+ if (unlikely(!io_fused_secondary_valid(secondary_op)))
+ return -EINVAL;
+
+ ioucmd->cmd = sqe->cmd;
+ ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
+
+ ret = -ENOMEM;
+ if (unlikely(!io_alloc_req(ctx, &secondary)))
+ goto fail;
+
+ ret = io_init_secondary_req(ctx, secondary, secondary_sqe,
+ REQ_F_FUSED_SECONDARY);
+ if (unlikely(ret))
+ goto fail_free_req;
+
+ ioucmd->fused.data.__secondary = secondary;
+
+ return 0;
+
+fail_free_req:
+ io_free_req(secondary);
+fail:
+ return ret;
+}
+
+int io_fused_cmd(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
+ int ret = -EINVAL;
+
+ ret = io_uring_cmd(req, issue_flags);
+ if (ret != IOU_ISSUE_SKIP_COMPLETE)
+ io_free_req(ioucmd->fused.data.__secondary);
+
+ return ret;
+}
+
+/*
+ * Called after secondary request is completed,
+ *
+ * Notify primary request by the saved callback that we are done
+ */
+void io_fused_cmd_complete_secondary(struct io_kiocb *secondary)
+{
+ struct io_kiocb *req = secondary->fused_primary_req;
+ struct io_uring_cmd *ioucmd;
+
+ if (unlikely(!req || !(secondary->flags & REQ_F_FUSED_SECONDARY)))
+ return;
+
+ /* notify primary command that we are done */
+ secondary->fused_primary_req = NULL;
+ ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
+ ioucmd->fused.data.__secondary = NULL;
+
+ io_uring_cmd_complete_in_task(ioucmd, ioucmd->task_work_cb);
+}
new file mode 100644
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef IOU_FUSED_CMD_H
+#define IOU_FUSED_CMD_H
+
+int io_fused_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_fused_cmd(struct io_kiocb *req, unsigned int issue_flags);
+void io_fused_cmd_complete_secondary(struct io_kiocb *secondary);
+
+#endif
@@ -92,6 +92,7 @@
#include "cancel.h"
#include "net.h"
#include "notif.h"
+#include "fused_cmd.h"
#include "timeout.h"
#include "poll.h"
@@ -111,7 +112,7 @@
#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \
REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \
- REQ_F_ASYNC_DATA)
+ REQ_F_ASYNC_DATA | REQ_F_FUSED_SECONDARY)
#define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\
IO_REQ_CLEAN_FLAGS)
@@ -971,6 +972,9 @@ static void __io_req_complete_post(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
+ if (req->flags & REQ_F_FUSED_SECONDARY)
+ io_fused_cmd_complete_secondary(req);
+
io_cq_lock(ctx);
if (!(req->flags & REQ_F_CQE_SKIP))
io_fill_cqe_req(ctx, req);
@@ -1855,6 +1859,8 @@ static void io_clean_op(struct io_kiocb *req)
spin_lock(&req->ctx->completion_lock);
io_put_kbuf_comp(req);
spin_unlock(&req->ctx->completion_lock);
+ } else if (req->flags & REQ_F_FUSED_SECONDARY) {
+ io_fused_cmd_complete_secondary(req);
}
if (req->flags & REQ_F_NEED_CLEANUP) {
@@ -2163,8 +2169,8 @@ static void io_init_req_drain(struct io_kiocb *req)
}
}
-static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
+static inline int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ const struct io_uring_sqe *sqe, u64 secondary_flags)
__must_hold(&ctx->uring_lock)
{
const struct io_issue_def *def;
@@ -2217,6 +2223,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
}
}
+ if (secondary_flags) {
+ if (!def->fused_secondary)
+ return -EINVAL;
+ req->flags |= secondary_flags;
+ }
+
if (!def->ioprio && sqe->ioprio)
return -EINVAL;
if (!def->iopoll && (ctx->flags & IORING_SETUP_IOPOLL))
@@ -2257,6 +2269,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
return def->prep(req, sqe);
}
+int io_init_secondary_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ const struct io_uring_sqe *sqe, u64 flags)
+{
+ return io_init_req(ctx, req, sqe, flags);
+}
+
static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
struct io_kiocb *req, int ret)
{
@@ -2301,7 +2319,7 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
struct io_submit_link *link = &ctx->submit_state.link;
int ret;
- ret = io_init_req(ctx, req, sqe);
+ ret = io_init_req(ctx, req, sqe, 0);
if (unlikely(ret))
return io_submit_fail_init(sqe, req, ret);
@@ -2396,7 +2414,8 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
* used, it's important that those reads are done through READ_ONCE() to
* prevent a re-load down the line.
*/
-static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
+static inline bool io_get_sqe(struct io_ring_ctx *ctx,
+ const struct io_uring_sqe **sqe)
{
unsigned head, mask = ctx->sq_entries - 1;
unsigned sq_idx = ctx->cached_sq_head++ & mask;
@@ -2425,6 +2444,12 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
return false;
}
+bool io_get_secondary_sqe(struct io_ring_ctx *ctx,
+ const struct io_uring_sqe **sqe)
+{
+ return io_get_sqe(ctx, sqe);
+}
+
int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
__must_hold(&ctx->uring_lock)
{
@@ -3855,7 +3880,8 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED |
IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS |
IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP |
- IORING_FEAT_LINKED_FILE | IORING_FEAT_REG_REG_RING;
+ IORING_FEAT_LINKED_FILE | IORING_FEAT_REG_REG_RING |
+ IORING_FEAT_FUSED_REQ;
if (copy_to_user(params, p, sizeof(*p))) {
ret = -EFAULT;
@@ -3913,7 +3939,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL |
IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG |
IORING_SETUP_SQE128 | IORING_SETUP_CQE32 |
- IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN))
+ IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN |
+ IORING_SETUP_FUSED_REQ))
return -EINVAL;
return io_uring_create(entries, &p, params);
@@ -78,6 +78,11 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
bool cancel_all);
+bool io_get_secondary_sqe(struct io_ring_ctx *ctx,
+ const struct io_uring_sqe **sqe);
+int io_init_secondary_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ const struct io_uring_sqe *sqe, u64 flags);
+
#define io_lockdep_assert_cq_locked(ctx) \
do { \
if (ctx->flags & IORING_SETUP_IOPOLL) { \
@@ -33,6 +33,7 @@
#include "poll.h"
#include "cancel.h"
#include "rw.h"
+#include "fused_cmd.h"
static int io_no_issue(struct io_kiocb *req, unsigned int issue_flags)
{
@@ -428,6 +429,12 @@ const struct io_issue_def io_issue_defs[] = {
.prep = io_eopnotsupp_prep,
#endif
},
+ [IORING_OP_FUSED_CMD] = {
+ .needs_file = 1,
+ .plug = 1,
+ .prep = io_fused_cmd_prep,
+ .issue = io_fused_cmd,
+ },
};
@@ -648,6 +655,11 @@ const struct io_cold_def io_cold_defs[] = {
.fail = io_sendrecv_fail,
#endif
},
+ [IORING_OP_FUSED_CMD] = {
+ .name = "FUSED_CMD",
+ .async_size = uring_cmd_pdu_size(1),
+ .prep_async = io_uring_cmd_prep_async,
+ },
};
const char *io_uring_get_opcode(u8 opcode)
@@ -29,6 +29,8 @@ struct io_issue_def {
unsigned iopoll_queue : 1;
/* opcode specific path will handle ->async_data allocation if needed */
unsigned manual_alloc : 1;
+ /* can be secondary op of fused command */
+ unsigned fused_secondary : 1;
int (*issue)(struct io_kiocb *, unsigned int);
int (*prep)(struct io_kiocb *, const struct io_uring_sqe *);