[09/11] io_uring/rsrc: don't offload node free

Message ID 8083bdb49f57a968104137f1f256af8a5d46da64.1680187408.git.asml.silence@gmail.com
State New
Headers
Series optimise registered buffer/file updates |

Commit Message

Pavel Begunkov March 30, 2023, 2:53 p.m. UTC
  struct delayed_work rsrc_put_work was previously used to offload node
freeing because io_rsrc_node_ref_zero() was previously called by RCU in
the IRQ context. Now, as percpu refcounting is gone, we can do it
eagerly at the spot without pushing it to a worker.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 include/linux/io_uring_types.h |  3 --
 io_uring/io_uring.c            |  6 ----
 io_uring/rsrc.c                | 59 +++-------------------------------
 3 files changed, 4 insertions(+), 64 deletions(-)
  

Patch

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 9492889f00c0..47496059e13a 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -330,9 +330,6 @@  struct io_ring_ctx {
 	struct io_rsrc_data		*file_data;
 	struct io_rsrc_data		*buf_data;
 
-	struct delayed_work		rsrc_put_work;
-	struct callback_head		rsrc_put_tw;
-	struct llist_head		rsrc_put_llist;
 	/* protected by ->uring_lock */
 	struct list_head		rsrc_ref_list;
 
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index e94780c0a024..8c3886a4ca96 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -326,9 +326,6 @@  static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	INIT_LIST_HEAD(&ctx->timeout_list);
 	INIT_LIST_HEAD(&ctx->ltimeout_list);
 	INIT_LIST_HEAD(&ctx->rsrc_ref_list);
-	INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work);
-	init_task_work(&ctx->rsrc_put_tw, io_rsrc_put_tw);
-	init_llist_head(&ctx->rsrc_put_llist);
 	init_llist_head(&ctx->work_llist);
 	INIT_LIST_HEAD(&ctx->tctx_list);
 	ctx->submit_state.free_list.next = NULL;
@@ -2822,11 +2819,8 @@  static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 		io_rsrc_node_destroy(ctx->rsrc_node);
 	if (ctx->rsrc_backup_node)
 		io_rsrc_node_destroy(ctx->rsrc_backup_node);
-	flush_delayed_work(&ctx->rsrc_put_work);
-	flush_delayed_work(&ctx->fallback_work);
 
 	WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list));
-	WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist));
 
 #if defined(CONFIG_UNIX)
 	if (ctx->ring_sock) {
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 95e71300bb35..0f4e245dee1b 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -145,15 +145,8 @@  static void io_rsrc_put_work_one(struct io_rsrc_data *rsrc_data,
 {
 	struct io_ring_ctx *ctx = rsrc_data->ctx;
 
-	if (prsrc->tag) {
-		if (ctx->flags & IORING_SETUP_IOPOLL) {
-			mutex_lock(&ctx->uring_lock);
-			io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
-			mutex_unlock(&ctx->uring_lock);
-		} else {
-			io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
-		}
-	}
+	if (prsrc->tag)
+		io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
 	rsrc_data->do_put(ctx, prsrc);
 }
 
@@ -176,32 +169,6 @@  static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
 		complete(&rsrc_data->done);
 }
 
-void io_rsrc_put_work(struct work_struct *work)
-{
-	struct io_ring_ctx *ctx;
-	struct llist_node *node;
-
-	ctx = container_of(work, struct io_ring_ctx, rsrc_put_work.work);
-	node = llist_del_all(&ctx->rsrc_put_llist);
-
-	while (node) {
-		struct io_rsrc_node *ref_node;
-		struct llist_node *next = node->next;
-
-		ref_node = llist_entry(node, struct io_rsrc_node, llist);
-		__io_rsrc_put_work(ref_node);
-		node = next;
-	}
-}
-
-void io_rsrc_put_tw(struct callback_head *cb)
-{
-	struct io_ring_ctx *ctx = container_of(cb, struct io_ring_ctx,
-					       rsrc_put_tw);
-
-	io_rsrc_put_work(&ctx->rsrc_put_work.work);
-}
-
 void io_wait_rsrc_data(struct io_rsrc_data *data)
 {
 	if (data && !atomic_dec_and_test(&data->refs))
@@ -217,34 +184,18 @@  void io_rsrc_node_ref_zero(struct io_rsrc_node *node)
 	__must_hold(&node->rsrc_data->ctx->uring_lock)
 {
 	struct io_ring_ctx *ctx = node->rsrc_data->ctx;
-	bool first_add = false;
-	unsigned long delay = HZ;
 
 	node->done = true;
-
-	/* if we are mid-quiesce then do not delay */
-	if (node->rsrc_data->quiesce)
-		delay = 0;
-
 	while (!list_empty(&ctx->rsrc_ref_list)) {
 		node = list_first_entry(&ctx->rsrc_ref_list,
 					    struct io_rsrc_node, node);
 		/* recycle ref nodes in order */
 		if (!node->done)
 			break;
-		list_del(&node->node);
-		first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist);
-	}
 
-	if (!first_add)
-		return;
-
-	if (ctx->submitter_task) {
-		if (!task_work_add(ctx->submitter_task, &ctx->rsrc_put_tw,
-				   ctx->notify_method))
-			return;
+		list_del(&node->node);
+		__io_rsrc_put_work(node);
 	}
-	mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay);
 }
 
 static struct io_rsrc_node *io_rsrc_node_alloc(void)
@@ -320,13 +271,11 @@  __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
 		if (ret < 0) {
 			atomic_inc(&data->refs);
 			/* wait for all works potentially completing data->done */
-			flush_delayed_work(&ctx->rsrc_put_work);
 			reinit_completion(&data->done);
 			mutex_lock(&ctx->uring_lock);
 			break;
 		}
 
-		flush_delayed_work(&ctx->rsrc_put_work);
 		ret = wait_for_completion_interruptible(&data->done);
 		if (!ret) {
 			mutex_lock(&ctx->uring_lock);