* [PATCH 0/4] Cache tctx cancelation state in the ctx
@ 2023-02-17 15:55 Jens Axboe
2023-02-17 15:55 ` [PATCH 1/4] io_uring: consolidate the put_ref-and-return section of adding work Jens Axboe
` (3 more replies)
0 siblings, 4 replies; 6+ messages in thread
From: Jens Axboe @ 2023-02-17 15:55 UTC (permalink / raw)
To: io-uring
Hi,
One of the more expensive parts of io_req_local_work_add() is that it
has to pull in the remote task tctx to check for the very unlikely event
that we are in a cancelation state.
Cache the cancelation state in each ctx instead. This makes the marking
of cancelation (and clearing) a bit more expensive, but those are very
slow path operations. The upside is that io_req_local_work_add()
becomes a lot cheaper, which is what we care about.
--
Jens Axboe
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 1/4] io_uring: consolidate the put_ref-and-return section of adding work
2023-02-17 15:55 [PATCH 0/4] Cache tctx cancelation state in the ctx Jens Axboe
@ 2023-02-17 15:55 ` Jens Axboe
2023-02-17 15:55 ` [PATCH 2/4] io_uring: rename 'in_idle' to 'in_cancel' Jens Axboe
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2023-02-17 15:55 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
We've got a few cases of this, move them to one section and just use
gotos to get there. Reduces the text section on both arm64 and x86-64,
using gcc-12.2.
Signed-off-by: Jens Axboe <[email protected]>
---
io_uring/io_uring.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 3b915deb4d08..cbe06deb84ff 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1285,17 +1285,15 @@ static void io_req_local_work_add(struct io_kiocb *req)
percpu_ref_get(&ctx->refs);
- if (!llist_add(&req->io_task_work.node, &ctx->work_llist)) {
- percpu_ref_put(&ctx->refs);
- return;
- }
+ if (!llist_add(&req->io_task_work.node, &ctx->work_llist))
+ goto put_ref;
+
/* needed for the following wake up */
smp_mb__after_atomic();
if (unlikely(atomic_read(&req->task->io_uring->in_idle))) {
io_move_task_work_from_local(ctx);
- percpu_ref_put(&ctx->refs);
- return;
+ goto put_ref;
}
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
@@ -1305,6 +1303,8 @@ static void io_req_local_work_add(struct io_kiocb *req)
if (READ_ONCE(ctx->cq_waiting))
wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
+
+put_ref:
percpu_ref_put(&ctx->refs);
}
--
2.39.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/4] io_uring: rename 'in_idle' to 'in_cancel'
2023-02-17 15:55 [PATCH 0/4] Cache tctx cancelation state in the ctx Jens Axboe
2023-02-17 15:55 ` [PATCH 1/4] io_uring: consolidate the put_ref-and-return section of adding work Jens Axboe
@ 2023-02-17 15:55 ` Jens Axboe
2023-02-17 15:55 ` [PATCH 3/4] io_uring: cache task cancelation state in the ctx Jens Axboe
2023-02-17 15:56 ` [PATCH 4/4] io_uring: use local ctx cancelation state for io_req_local_work_add() Jens Axboe
3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2023-02-17 15:55 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
This better describes what it does - it's incremented when the task is
currently undergoing a cancelation operation, due to exiting or exec'ing.
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/io_uring_types.h | 2 +-
io_uring/io_uring.c | 18 +++++++++---------
io_uring/tctx.c | 2 +-
3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 0efe4d784358..00689c12f6ab 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -58,7 +58,7 @@ struct io_uring_task {
struct xarray xa;
struct wait_queue_head wait;
- atomic_t in_idle;
+ atomic_t in_cancel;
atomic_t inflight_tracked;
struct percpu_counter inflight;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index cbe06deb84ff..64e07df034d1 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -719,7 +719,7 @@ static void io_put_task_remote(struct task_struct *task, int nr)
struct io_uring_task *tctx = task->io_uring;
percpu_counter_sub(&tctx->inflight, nr);
- if (unlikely(atomic_read(&tctx->in_idle)))
+ if (unlikely(atomic_read(&tctx->in_cancel)))
wake_up(&tctx->wait);
put_task_struct_many(task, nr);
}
@@ -1258,8 +1258,8 @@ void tctx_task_work(struct callback_head *cb)
ctx_flush_and_put(ctx, &uring_locked);
- /* relaxed read is enough as only the task itself sets ->in_idle */
- if (unlikely(atomic_read(&tctx->in_idle)))
+ /* relaxed read is enough as only the task itself sets ->in_cancel */
+ if (unlikely(atomic_read(&tctx->in_cancel)))
io_uring_drop_tctx_refs(current);
trace_io_uring_task_work_run(tctx, count, loops);
@@ -1291,7 +1291,7 @@ static void io_req_local_work_add(struct io_kiocb *req)
/* needed for the following wake up */
smp_mb__after_atomic();
- if (unlikely(atomic_read(&req->task->io_uring->in_idle))) {
+ if (unlikely(atomic_read(&req->task->io_uring->in_cancel))) {
io_move_task_work_from_local(ctx);
goto put_ref;
}
@@ -2937,12 +2937,12 @@ static __cold void io_tctx_exit_cb(struct callback_head *cb)
work = container_of(cb, struct io_tctx_exit, task_work);
/*
- * When @in_idle, we're in cancellation and it's racy to remove the
+ * When @in_cancel, we're in cancellation and it's racy to remove the
* node. It'll be removed by the end of cancellation, just ignore it.
* tctx can be NULL if the queueing of this task_work raced with
* work cancelation off the exec path.
*/
- if (tctx && !atomic_read(&tctx->in_idle))
+ if (tctx && !atomic_read(&tctx->in_cancel))
io_uring_del_tctx_node((unsigned long)work->ctx);
complete(&work->completion);
}
@@ -3210,7 +3210,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
if (tctx->io_wq)
io_wq_exit_start(tctx->io_wq);
- atomic_inc(&tctx->in_idle);
+ atomic_inc(&tctx->in_cancel);
do {
bool loop = false;
@@ -3261,9 +3261,9 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
if (cancel_all) {
/*
* We shouldn't run task_works after cancel, so just leave
- * ->in_idle set for normal exit.
+ * ->in_cancel set for normal exit.
*/
- atomic_dec(&tctx->in_idle);
+ atomic_dec(&tctx->in_cancel);
/* for exec all current's requests should be gone, kill tctx */
__io_uring_free(current);
}
diff --git a/io_uring/tctx.c b/io_uring/tctx.c
index 4324b1cf1f6a..3a8d1dd97e1b 100644
--- a/io_uring/tctx.c
+++ b/io_uring/tctx.c
@@ -83,7 +83,7 @@ __cold int io_uring_alloc_task_context(struct task_struct *task,
xa_init(&tctx->xa);
init_waitqueue_head(&tctx->wait);
- atomic_set(&tctx->in_idle, 0);
+ atomic_set(&tctx->in_cancel, 0);
atomic_set(&tctx->inflight_tracked, 0);
task->io_uring = tctx;
init_llist_head(&tctx->task_list);
--
2.39.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 3/4] io_uring: cache task cancelation state in the ctx
2023-02-17 15:55 [PATCH 0/4] Cache tctx cancelation state in the ctx Jens Axboe
2023-02-17 15:55 ` [PATCH 1/4] io_uring: consolidate the put_ref-and-return section of adding work Jens Axboe
2023-02-17 15:55 ` [PATCH 2/4] io_uring: rename 'in_idle' to 'in_cancel' Jens Axboe
@ 2023-02-17 15:55 ` Jens Axboe
2023-02-20 14:43 ` Gabriel Krisman Bertazi
2023-02-17 15:56 ` [PATCH 4/4] io_uring: use local ctx cancelation state for io_req_local_work_add() Jens Axboe
3 siblings, 1 reply; 6+ messages in thread
From: Jens Axboe @ 2023-02-17 15:55 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
It can be quite expensive for the fast paths to deference
req->task->io_uring->in_cancel for the (very) unlikely scenario that
we're currently undergoing cancelations.
Add a ctx bit to indicate if we're currently canceling or not, so that
the hot path may check this rather than dip into the remote task
state.
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/io_uring_types.h | 2 ++
io_uring/io_uring.c | 44 ++++++++++++++++++++++++++++++++--
2 files changed, 44 insertions(+), 2 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 00689c12f6ab..42d704adb9c6 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -211,6 +211,8 @@ struct io_ring_ctx {
enum task_work_notify_mode notify_method;
struct io_rings *rings;
struct task_struct *submitter_task;
+ /* local ctx cache of task cancel state */
+ unsigned long in_cancel;
struct percpu_ref refs;
} ____cacheline_aligned_in_smp;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 64e07df034d1..0fcb532db1fc 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3192,6 +3192,46 @@ static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
return percpu_counter_sum(&tctx->inflight);
}
+static __cold void io_uring_dec_cancel(struct io_uring_task *tctx,
+ struct io_sq_data *sqd)
+{
+ if (!atomic_dec_return(&tctx->in_cancel))
+ return;
+
+ if (!sqd) {
+ struct io_tctx_node *node;
+ unsigned long index;
+
+ xa_for_each(&tctx->xa, index, node)
+ clear_bit(0, &node->ctx->in_cancel);
+ } else {
+ struct io_ring_ctx *ctx;
+
+ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+ clear_bit(0, &ctx->in_cancel);
+ }
+}
+
+static __cold void io_uring_inc_cancel(struct io_uring_task *tctx,
+ struct io_sq_data *sqd)
+{
+ if (atomic_inc_return(&tctx->in_cancel) != 1)
+ return;
+
+ if (!sqd) {
+ struct io_tctx_node *node;
+ unsigned long index;
+
+ xa_for_each(&tctx->xa, index, node)
+ set_bit(0, &node->ctx->in_cancel);
+ } else {
+ struct io_ring_ctx *ctx;
+
+ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+ set_bit(0, &ctx->in_cancel);
+ }
+}
+
/*
* Find any io_uring ctx that this task has registered or done IO on, and cancel
* requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
@@ -3210,7 +3250,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
if (tctx->io_wq)
io_wq_exit_start(tctx->io_wq);
- atomic_inc(&tctx->in_cancel);
+ io_uring_inc_cancel(tctx, sqd);
do {
bool loop = false;
@@ -3263,7 +3303,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
* We shouldn't run task_works after cancel, so just leave
* ->in_cancel set for normal exit.
*/
- atomic_dec(&tctx->in_cancel);
+ io_uring_dec_cancel(tctx, sqd);
/* for exec all current's requests should be gone, kill tctx */
__io_uring_free(current);
}
--
2.39.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 4/4] io_uring: use local ctx cancelation state for io_req_local_work_add()
2023-02-17 15:55 [PATCH 0/4] Cache tctx cancelation state in the ctx Jens Axboe
` (2 preceding siblings ...)
2023-02-17 15:55 ` [PATCH 3/4] io_uring: cache task cancelation state in the ctx Jens Axboe
@ 2023-02-17 15:56 ` Jens Axboe
3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2023-02-17 15:56 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
Rather than look into the remote task state and multiple layers of
memory dereferencing, just use the local state instead.
Signed-off-by: Jens Axboe <[email protected]>
---
io_uring/io_uring.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 0fcb532db1fc..792ab44393c2 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1291,7 +1291,7 @@ static void io_req_local_work_add(struct io_kiocb *req)
/* needed for the following wake up */
smp_mb__after_atomic();
- if (unlikely(atomic_read(&req->task->io_uring->in_cancel))) {
+ if (unlikely(test_bit(0, &ctx->in_cancel))) {
io_move_task_work_from_local(ctx);
goto put_ref;
}
--
2.39.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH 3/4] io_uring: cache task cancelation state in the ctx
2023-02-17 15:55 ` [PATCH 3/4] io_uring: cache task cancelation state in the ctx Jens Axboe
@ 2023-02-20 14:43 ` Gabriel Krisman Bertazi
0 siblings, 0 replies; 6+ messages in thread
From: Gabriel Krisman Bertazi @ 2023-02-20 14:43 UTC (permalink / raw)
To: Jens Axboe; +Cc: io-uring
Jens Axboe <[email protected]> writes:
> It can be quite expensive for the fast paths to deference
> req->task->io_uring->in_cancel for the (very) unlikely scenario that
> we're currently undergoing cancelations.
>
> Add a ctx bit to indicate if we're currently canceling or not, so that
> the hot path may check this rather than dip into the remote task
> state.
>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
> include/linux/io_uring_types.h | 2 ++
> io_uring/io_uring.c | 44 ++++++++++++++++++++++++++++++++--
> 2 files changed, 44 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index 00689c12f6ab..42d704adb9c6 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -211,6 +211,8 @@ struct io_ring_ctx {
> enum task_work_notify_mode notify_method;
> struct io_rings *rings;
> struct task_struct *submitter_task;
> + /* local ctx cache of task cancel state */
> + unsigned long in_cancel;
minor nit:
even though the real tctx value is ulong, the cache could just be a bitfield
alongside the many others in this structure. you only care if it is >0
or 0 when peeking the cache.
either way,
Reviewed-by: Gabriel Krisman Bertazi <[email protected]>
> struct percpu_ref refs;
> } ____cacheline_aligned_in_smp;
>
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 64e07df034d1..0fcb532db1fc 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -3192,6 +3192,46 @@ static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
> return percpu_counter_sum(&tctx->inflight);
> }
>
> +static __cold void io_uring_dec_cancel(struct io_uring_task *tctx,
> + struct io_sq_data *sqd)
> +{
> + if (!atomic_dec_return(&tctx->in_cancel))
> + return;
> +
> + if (!sqd) {
> + struct io_tctx_node *node;
> + unsigned long index;
> +
> + xa_for_each(&tctx->xa, index, node)
> + clear_bit(0, &node->ctx->in_cancel);
> + } else {
> + struct io_ring_ctx *ctx;
> +
> + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
> + clear_bit(0, &ctx->in_cancel);
> + }
> +}
> +
> +static __cold void io_uring_inc_cancel(struct io_uring_task *tctx,
> + struct io_sq_data *sqd)
> +{
> + if (atomic_inc_return(&tctx->in_cancel) != 1)
> + return;
> +
> + if (!sqd) {
> + struct io_tctx_node *node;
> + unsigned long index;
> +
> + xa_for_each(&tctx->xa, index, node)
> + set_bit(0, &node->ctx->in_cancel);
> + } else {
> + struct io_ring_ctx *ctx;
> +
> + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
> + set_bit(0, &ctx->in_cancel);
> + }
> +}
> +
> /*
> * Find any io_uring ctx that this task has registered or done IO on, and cancel
> * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
> @@ -3210,7 +3250,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
> if (tctx->io_wq)
> io_wq_exit_start(tctx->io_wq);
>
> - atomic_inc(&tctx->in_cancel);
> + io_uring_inc_cancel(tctx, sqd);
> do {
> bool loop = false;
>
> @@ -3263,7 +3303,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
> * We shouldn't run task_works after cancel, so just leave
> * ->in_cancel set for normal exit.
> */
> - atomic_dec(&tctx->in_cancel);
> + io_uring_dec_cancel(tctx, sqd);
> /* for exec all current's requests should be gone, kill tctx */
> __io_uring_free(current);
> }
--
Gabriel Krisman Bertazi
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2023-02-20 14:43 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-02-17 15:55 [PATCH 0/4] Cache tctx cancelation state in the ctx Jens Axboe
2023-02-17 15:55 ` [PATCH 1/4] io_uring: consolidate the put_ref-and-return section of adding work Jens Axboe
2023-02-17 15:55 ` [PATCH 2/4] io_uring: rename 'in_idle' to 'in_cancel' Jens Axboe
2023-02-17 15:55 ` [PATCH 3/4] io_uring: cache task cancelation state in the ctx Jens Axboe
2023-02-20 14:43 ` Gabriel Krisman Bertazi
2023-02-17 15:56 ` [PATCH 4/4] io_uring: use local ctx cancelation state for io_req_local_work_add() Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox