From: Jens Axboe <axboe@kernel.dk>
To: io-uring@vger.kernel.org
Cc: dvyukov@google.com, csander@purestorage.com, krisman@suse.de,
Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 6/6] io_uring: remove the per-ctx fallback task_work machinery
Date: Thu, 11 Jun 2026 20:48:32 -0600 [thread overview]
Message-ID: <20260612025125.1690253-7-axboe@kernel.dk> (raw)
In-Reply-To: <20260612025125.1690253-1-axboe@kernel.dk>
With the tctx fallback running its entries directly, the per-ctx
fallback work has a single user left: moving local (DEFER_TASKRUN)
task_work entries out of a ring that is going away. Both of its call
sites are process context and don't hold ->uring_lock, the same
conditions the deferred fallback work itself ran under - so run the
entries in cancel mode right there instead, and rename the helper to
io_cancel_local_task_work() to match what it now does.
With that, ->fallback_llist, ->fallback_work, io_fallback_req_func()
and __io_fallback_tw() can all go away, along with the fallback work
flushing in the ring exit and cancel paths. Requests that get
orphaned by an exiting task now run via the tctx fallback work, which
the ring exit side implicitly waits on through the ctx refs those
requests hold.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
include/linux/io_uring_types.h | 2 -
io_uring/cancel.c | 2 -
io_uring/io_uring.c | 7 +---
io_uring/tw.c | 67 +++++++---------------------------
io_uring/tw.h | 3 +-
5 files changed, 16 insertions(+), 65 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 33de451127f9..a0de8dafd990 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -498,8 +498,6 @@ struct io_ring_ctx {
struct mutex tctx_lock;
/* ctx exit and cancelation */
- struct llist_head fallback_llist;
- struct delayed_work fallback_work;
struct work_struct exit_work;
struct completion ref_comp;
diff --git a/io_uring/cancel.c b/io_uring/cancel.c
index 4aa3103ba9c3..8c6fa6f367e4 100644
--- a/io_uring/cancel.c
+++ b/io_uring/cancel.c
@@ -565,8 +565,6 @@ __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
mutex_unlock(&ctx->uring_lock);
if (tctx)
ret |= io_run_task_work() > 0;
- else
- ret |= flush_delayed_work(&ctx->fallback_work);
return ret;
}
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 16acd99ff083..33b4340d32a7 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -289,7 +289,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
#ifdef CONFIG_FUTEX
INIT_HLIST_HEAD(&ctx->futex_list);
#endif
- INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
INIT_HLIST_HEAD(&ctx->cancelable_uring_cmd);
io_napi_init(ctx);
@@ -1204,7 +1203,7 @@ __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
mutex_unlock(&ctx->uring_lock);
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
- io_move_task_work_from_local(ctx);
+ io_cancel_local_task_work(ctx);
}
static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events)
@@ -2350,7 +2349,7 @@ static __cold void io_ring_exit_work(struct work_struct *work)
/* The SQPOLL thread never reaches this path */
do {
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
- io_move_task_work_from_local(ctx);
+ io_cancel_local_task_work(ctx);
cond_resched();
} while (io_uring_try_cancel_requests(ctx, NULL, true, false));
@@ -2436,8 +2435,6 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
io_unregister_personality(ctx, index);
mutex_unlock(&ctx->uring_lock);
- flush_delayed_work(&ctx->fallback_work);
-
INIT_WORK(&ctx->exit_work, io_ring_exit_work);
/*
* Use system_dfl_wq to avoid spawning tons of event kworkers
diff --git a/io_uring/tw.c b/io_uring/tw.c
index 0fa685aa3926..31f9feb42353 100644
--- a/io_uring/tw.c
+++ b/io_uring/tw.c
@@ -16,24 +16,6 @@
#include "wait.h"
#include "mpscq.h"
-void io_fallback_req_func(struct work_struct *work)
-{
- struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
- fallback_work.work);
- struct llist_node *node = llist_del_all(&ctx->fallback_llist);
- struct io_kiocb *req, *tmp;
- struct io_tw_state ts = {};
-
- percpu_ref_get(&ctx->refs);
- mutex_lock(&ctx->uring_lock);
- ts.cancel = io_should_terminate_tw(ctx);
- llist_for_each_entry_safe(req, tmp, node, io_task_work.node)
- req->io_task_work.func((struct io_tw_req){req}, ts);
- io_submit_flush_completions(ctx);
- mutex_unlock(&ctx->uring_lock);
- percpu_ref_put(&ctx->refs);
-}
-
static void ctx_flush_and_put(struct io_ring_ctx *ctx, io_tw_token_t tw)
{
if (!ctx)
@@ -46,34 +28,6 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, io_tw_token_t tw)
percpu_ref_put(&ctx->refs);
}
-static __cold void __io_fallback_tw(struct llist_node *node, bool sync)
-{
- struct io_ring_ctx *last_ctx = NULL;
- struct io_kiocb *req;
-
- while (node) {
- req = container_of(node, struct io_kiocb, io_task_work.node);
- node = node->next;
- if (last_ctx != req->ctx) {
- if (last_ctx) {
- if (sync)
- flush_delayed_work(&last_ctx->fallback_work);
- percpu_ref_put(&last_ctx->refs);
- }
- last_ctx = req->ctx;
- percpu_ref_get(&last_ctx->refs);
- }
- if (llist_add(&req->io_task_work.node, &last_ctx->fallback_llist))
- schedule_delayed_work(&last_ctx->fallback_work, 1);
- }
-
- if (last_ctx) {
- if (sync)
- flush_delayed_work(&last_ctx->fallback_work);
- percpu_ref_put(&last_ctx->refs);
- }
-}
-
void io_tctx_fallback_work(struct work_struct *work)
{
struct io_uring_task *tctx = container_of(work, struct io_uring_task,
@@ -278,29 +232,34 @@ void io_req_task_work_add_remote(struct io_kiocb *req, unsigned flags)
__io_req_task_work_add(req, flags);
}
-void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
+void __cold io_cancel_local_task_work(struct io_ring_ctx *ctx)
{
- struct llist_node *node, *first = NULL, **tail = &first;
+ struct io_tw_state ts = { .cancel = true };
+ struct llist_node *node;
/*
* The work list consumer side is serialized by ->uring_lock, see
* __io_run_local_work(). Grab it to guard against racing with normal
- * task_work running, as the task may be exiting.
+ * task_work running, as the task may be exiting. The ring is going
+ * away, run the entries in cancel mode right here - the callers
+ * provide the same process context the per-ctx fallback work that
+ * they were previously punted to ran in.
*/
guard(mutex)(&ctx->uring_lock);
while (!mpscq_empty(&ctx->work_list)) {
+ struct io_kiocb *req;
+
node = mpscq_pop(&ctx->work_list, &ctx->work_head);
if (!node) {
/* a producer is mid-push, wait for it to link */
- cpu_relax();
+ cond_resched();
continue;
}
- *tail = node;
- tail = &node->next;
+ req = container_of(node, struct io_kiocb, io_task_work.node);
+ req->io_task_work.func((struct io_tw_req){req}, ts);
}
- *tail = NULL;
- __io_fallback_tw(first, false);
+ io_submit_flush_completions(ctx);
}
static bool io_run_local_work_continue(struct io_ring_ctx *ctx, int events,
diff --git a/io_uring/tw.h b/io_uring/tw.h
index 387e52004da8..3ade5ad577fd 100644
--- a/io_uring/tw.h
+++ b/io_uring/tw.h
@@ -30,8 +30,7 @@ void io_tctx_fallback_work(struct work_struct *work);
int io_run_local_work(struct io_ring_ctx *ctx, int min_events, int max_events);
int io_run_task_work_sig(struct io_ring_ctx *ctx);
-__cold void io_fallback_req_func(struct work_struct *work);
-__cold void io_move_task_work_from_local(struct io_ring_ctx *ctx);
+__cold void io_cancel_local_task_work(struct io_ring_ctx *ctx);
int io_run_local_work_locked(struct io_ring_ctx *ctx, int min_events);
void io_req_local_work_add(struct io_kiocb *req, unsigned flags);
--
2.53.0
prev parent reply other threads:[~2026-06-12 2:51 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-12 2:48 [PATCHSET v2] Add lockless MPSC FIFO queue for task work Jens Axboe
2026-06-12 2:48 ` [PATCH 1/6] io_uring: grab RCU read lock marking task run Jens Axboe
2026-06-13 2:27 ` Caleb Sander Mateos
2026-06-12 2:48 ` [PATCH 2/6] io_uring/mpscq: add lockless multi-producer, single-consumer FIFO queue Jens Axboe
2026-06-13 2:40 ` Caleb Sander Mateos
2026-06-13 12:22 ` Jens Axboe
2026-06-12 2:48 ` [PATCH 3/6] io_uring: switch local task_work to a mpscq Jens Axboe
2026-06-12 3:20 ` Caleb Sander Mateos
2026-06-12 12:23 ` Jens Axboe
2026-06-12 2:48 ` [PATCH 4/6] io_uring: switch normal " Jens Axboe
2026-06-12 18:59 ` Caleb Sander Mateos
2026-06-12 19:37 ` Jens Axboe
2026-06-13 2:26 ` Caleb Sander Mateos
2026-06-13 12:08 ` Jens Axboe
2026-06-15 18:33 ` Caleb Sander Mateos
2026-06-15 18:47 ` Jens Axboe
2026-06-15 20:04 ` Jens Axboe
2026-06-15 20:40 ` Caleb Sander Mateos
2026-06-15 21:51 ` Jens Axboe
2026-06-16 0:22 ` Caleb Sander Mateos
2026-06-12 2:48 ` [PATCH 5/6] io_uring: run the tctx task_work fallback directly Jens Axboe
2026-06-12 2:48 ` Jens Axboe [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260612025125.1690253-7-axboe@kernel.dk \
--to=axboe@kernel.dk \
--cc=csander@purestorage.com \
--cc=dvyukov@google.com \
--cc=io-uring@vger.kernel.org \
--cc=krisman@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox