From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: [email protected], Jens Axboe <[email protected]>,
[email protected], Kanchan Joshi <[email protected]>,
Ming Lei <[email protected]>
Subject: [PATCH v2 11/14] io_uring: get rid of intermediate aux cqe caches
Date: Mon, 18 Mar 2024 00:41:56 +0000 [thread overview]
Message-ID: <a23c620eaf39d5f303e5b1f3f1fa37693382621e.1710720150.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>
io_post_aux_cqe(), which is used for multishot requests, delays
completions by putting CQEs into a temporary array for the purpose
completion lock/flush batching.
DEFER_TASKRUN doesn't need any locking, so for it we can put completions
directly into the CQ and defer post completion handling with a flag.
That leaves !DEFER_TASKRUN, which is not that interesting / hot for
multishot requests, so have conditional locking with deferred flush
for them.
Signed-off-by: Pavel Begunkov <[email protected]>
Link: https://lore.kernel.org/r/0eb3f55722540a11b036d3c90771220eb082d65e.1710514702.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/io_uring_types.h | 3 +-
io_uring/io_uring.c | 64 ++++++++--------------------------
2 files changed, 15 insertions(+), 52 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 5a2afbc93887..ea7e5488b3be 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -205,6 +205,7 @@ struct io_submit_state {
bool plug_started;
bool need_plug;
+ bool cq_flush;
unsigned short submit_nr;
unsigned int cqes_count;
struct blk_plug plug;
@@ -342,8 +343,6 @@ struct io_ring_ctx {
unsigned cq_last_tm_flush;
} ____cacheline_aligned_in_smp;
- struct io_uring_cqe completion_cqes[16];
-
spinlock_t completion_lock;
/* IRQ completion list, under ->completion_lock */
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 30542dda1473..8c485bcb5cb7 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -176,7 +176,7 @@ static struct ctl_table kernel_io_uring_disabled_table[] = {
static inline void io_submit_flush_completions(struct io_ring_ctx *ctx)
{
if (!wq_list_empty(&ctx->submit_state.compl_reqs) ||
- ctx->submit_state.cqes_count)
+ ctx->submit_state.cq_flush)
__io_submit_flush_completions(ctx);
}
@@ -636,6 +636,12 @@ static inline void __io_cq_lock(struct io_ring_ctx *ctx)
spin_lock(&ctx->completion_lock);
}
+static inline void __io_cq_unlock(struct io_ring_ctx *ctx)
+{
+ if (!ctx->lockless_cq)
+ spin_unlock(&ctx->completion_lock);
+}
+
static inline void io_cq_lock(struct io_ring_ctx *ctx)
__acquires(ctx->completion_lock)
{
@@ -888,31 +894,6 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
return false;
}
-static void __io_flush_post_cqes(struct io_ring_ctx *ctx)
- __must_hold(&ctx->uring_lock)
-{
- struct io_submit_state *state = &ctx->submit_state;
- unsigned int i;
-
- lockdep_assert_held(&ctx->uring_lock);
- for (i = 0; i < state->cqes_count; i++) {
- struct io_uring_cqe *cqe = &ctx->completion_cqes[i];
-
- if (!io_fill_cqe_aux(ctx, cqe->user_data, cqe->res, cqe->flags)) {
- if (ctx->lockless_cq) {
- spin_lock(&ctx->completion_lock);
- io_cqring_event_overflow(ctx, cqe->user_data,
- cqe->res, cqe->flags, 0, 0);
- spin_unlock(&ctx->completion_lock);
- } else {
- io_cqring_event_overflow(ctx, cqe->user_data,
- cqe->res, cqe->flags, 0, 0);
- }
- }
- }
- state->cqes_count = 0;
-}
-
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
{
bool filled;
@@ -933,31 +914,16 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags)
{
struct io_ring_ctx *ctx = req->ctx;
- u64 user_data = req->cqe.user_data;
- struct io_uring_cqe *cqe;
+ bool posted;
lockdep_assert(!io_wq_current_is_worker());
lockdep_assert_held(&ctx->uring_lock);
- if (ctx->submit_state.cqes_count == ARRAY_SIZE(ctx->completion_cqes)) {
- __io_cq_lock(ctx);
- __io_flush_post_cqes(ctx);
- /* no need to flush - flush is deferred */
- __io_cq_unlock_post(ctx);
- }
-
- /* For defered completions this is not as strict as it is otherwise,
- * however it's main job is to prevent unbounded posted completions,
- * and in that it works just as well.
- */
- if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq))
- return false;
-
- cqe = &ctx->completion_cqes[ctx->submit_state.cqes_count++];
- cqe->user_data = user_data;
- cqe->res = res;
- cqe->flags = cflags;
- return true;
+ __io_cq_lock(ctx);
+ posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags);
+ ctx->submit_state.cq_flush = true;
+ __io_cq_unlock_post(ctx);
+ return posted;
}
static void __io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
@@ -1551,9 +1517,6 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
struct io_wq_work_node *node;
__io_cq_lock(ctx);
- /* must come first to preserve CQE ordering in failure cases */
- if (state->cqes_count)
- __io_flush_post_cqes(ctx);
__wq_list_for_each(node, &state->compl_reqs) {
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);
@@ -1575,6 +1538,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
io_free_batch_list(ctx, state->compl_reqs.first);
INIT_WQ_LIST(&state->compl_reqs);
}
+ ctx->submit_state.cq_flush = false;
}
static unsigned io_cqring_events(struct io_ring_ctx *ctx)
--
2.44.0
next prev parent reply other threads:[~2024-03-18 0:43 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-03-18 0:41 [PATCH v2 00/14] remove aux CQE caches Pavel Begunkov
2024-03-18 0:41 ` [PATCH v2 01/14] io_uring/cmd: kill one issue_flags to tw conversion Pavel Begunkov
2024-03-18 0:41 ` [PATCH v2 02/14] io_uring/cmd: fix tw <-> issue_flags conversion Pavel Begunkov
2024-03-18 2:23 ` Ming Lei
2024-03-18 2:25 ` Jens Axboe
2024-03-18 2:32 ` Pavel Begunkov
2024-03-18 2:40 ` Jens Axboe
2024-03-18 2:43 ` Pavel Begunkov
2024-03-18 2:46 ` Jens Axboe
2024-03-18 2:47 ` Ming Lei
2024-03-18 3:11 ` Jens Axboe
2024-03-18 3:24 ` Pavel Begunkov
2024-03-18 6:59 ` Ming Lei
2024-03-18 11:45 ` Pavel Begunkov
2024-03-18 0:41 ` [PATCH v2 03/14] io_uring/cmd: make io_uring_cmd_done irq safe Pavel Begunkov
2024-03-18 8:10 ` Ming Lei
2024-03-18 11:50 ` Pavel Begunkov
2024-03-18 11:59 ` Ming Lei
2024-03-18 12:46 ` Pavel Begunkov
2024-03-18 13:09 ` Ming Lei
2024-03-18 0:41 ` [PATCH v2 04/14] io_uring/cmd: introduce io_uring_cmd_complete Pavel Begunkov
2024-03-18 0:41 ` [PATCH v2 05/14] ublk: don't hard code IO_URING_F_UNLOCKED Pavel Begunkov
2024-03-18 8:16 ` Ming Lei
2024-03-18 12:52 ` Pavel Begunkov
2024-03-18 13:37 ` Pavel Begunkov
2024-03-18 14:32 ` Pavel Begunkov
2024-03-18 14:39 ` Ming Lei
2024-03-18 14:34 ` Ming Lei
2024-03-18 15:08 ` Pavel Begunkov
2024-03-18 15:16 ` Ming Lei
2024-03-18 0:41 ` [PATCH v2 06/14] nvme/io_uring: " Pavel Begunkov
2024-03-18 13:26 ` Kanchan Joshi
2024-03-18 13:38 ` Pavel Begunkov
2024-03-18 0:41 ` [PATCH v2 07/14] io_uring/rw: avoid punting to io-wq directly Pavel Begunkov
2024-03-18 0:41 ` [PATCH v2 08/14] io_uring: force tw ctx locking Pavel Begunkov
2024-03-18 0:41 ` [PATCH v2 09/14] io_uring: remove struct io_tw_state::locked Pavel Begunkov
2024-03-18 0:41 ` [PATCH v2 10/14] io_uring: refactor io_fill_cqe_req_aux Pavel Begunkov
2024-03-18 0:41 ` Pavel Begunkov [this message]
2024-03-18 0:41 ` [PATCH v2 12/14] io_uring: remove current check from complete_post Pavel Begunkov
2024-03-18 0:41 ` [PATCH v2 13/14] io_uring: refactor io_req_complete_post() Pavel Begunkov
2024-03-18 0:41 ` [PATCH v2 14/14] io_uring: clean up io_lockdep_assert_cq_locked Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=a23c620eaf39d5f303e5b1f3f1fa37693382621e.1710720150.git.asml.silence@gmail.com \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox