From: Pavel Begunkov <asml.silence@gmail.com>
To: io-uring@vger.kernel.org
Cc: asml.silence@gmail.com
Subject: [PATCH v2 5/7] io_uring: separate lock for protecting overflow list
Date: Sat, 17 May 2025 13:27:41 +0100 [thread overview]
Message-ID: <396cf940ec7788299dd0dc278416295ea94ae277.1747483784.git.asml.silence@gmail.com> (raw)
In-Reply-To: <cover.1747483784.git.asml.silence@gmail.com>
Introduce ->overflow_lock to protect all overflow ctx fields. With that
the caller is allowed but not always required to hold the completion
lock while overflowing.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
include/linux/io_uring_types.h | 1 +
io_uring/io_uring.c | 32 ++++++++++++--------------------
2 files changed, 13 insertions(+), 20 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 00dbd7cd0e7d..e11ab9d19877 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -370,6 +370,7 @@ struct io_ring_ctx {
spinlock_t completion_lock;
struct list_head cq_overflow_list;
+ spinlock_t overflow_lock;
struct hlist_head waitid_list;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index a2a4e1319033..86b39a01a136 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -350,6 +350,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
init_waitqueue_head(&ctx->cq_wait);
init_waitqueue_head(&ctx->poll_wq);
spin_lock_init(&ctx->completion_lock);
+ spin_lock_init(&ctx->overflow_lock);
raw_spin_lock_init(&ctx->timeout_lock);
INIT_WQ_LIST(&ctx->iopoll_list);
INIT_LIST_HEAD(&ctx->defer_list);
@@ -624,6 +625,8 @@ static bool io_flush_overflow_list(struct io_ring_ctx *ctx, bool dying)
if (ctx->flags & IORING_SETUP_CQE32)
cqe_size <<= 1;
+ guard(spinlock)(&ctx->overflow_lock);
+
while (!list_empty(&ctx->cq_overflow_list)) {
struct io_uring_cqe *cqe;
struct io_overflow_cqe *ocqe;
@@ -733,8 +736,6 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
size_t ocq_size = sizeof(struct io_overflow_cqe);
bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
- lockdep_assert_held(&ctx->completion_lock);
-
if (is_cqe32)
ocq_size += sizeof(struct io_uring_cqe);
@@ -750,6 +751,9 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
}
trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
+
+ guard(spinlock)(&ctx->overflow_lock);
+
if (!ocqe) {
struct io_rings *r = ctx->rings;
@@ -849,11 +853,9 @@ void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
lockdep_assert_held(&ctx->uring_lock);
lockdep_assert(ctx->lockless_cq);
- if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
- spin_lock(&ctx->completion_lock);
+ if (!io_fill_cqe_aux(ctx, user_data, res, cflags))
io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
- spin_unlock(&ctx->completion_lock);
- }
+
ctx->submit_state.cq_flush = true;
}
@@ -1442,20 +1444,10 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
*/
if (!(req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE)) &&
unlikely(!io_fill_cqe_req(ctx, req))) {
- if (ctx->lockless_cq) {
- spin_lock(&ctx->completion_lock);
- io_cqring_event_overflow(req->ctx, req->cqe.user_data,
- req->cqe.res, req->cqe.flags,
- req->big_cqe.extra1,
- req->big_cqe.extra2);
- spin_unlock(&ctx->completion_lock);
- } else {
- io_cqring_event_overflow(req->ctx, req->cqe.user_data,
- req->cqe.res, req->cqe.flags,
- req->big_cqe.extra1,
- req->big_cqe.extra2);
- }
-
+ io_cqring_event_overflow(req->ctx, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags,
+ req->big_cqe.extra1,
+ req->big_cqe.extra2);
memset(&req->big_cqe, 0, sizeof(req->big_cqe));
}
}
--
2.49.0
next prev parent reply other threads:[~2025-05-17 12:26 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-17 12:27 [PATCH v2 0/7] simplify overflow CQE handling Pavel Begunkov
2025-05-17 12:27 ` [PATCH v2 1/7] io_uring: fix overflow resched cqe reordering Pavel Begunkov
2025-05-17 12:27 ` [PATCH v2 2/7] io_uring: init overflow entry before passing to tracing Pavel Begunkov
2025-05-17 12:27 ` [PATCH v2 3/7] io_uring: open code io_req_cqe_overflow() Pavel Begunkov
2025-05-17 12:27 ` [PATCH v2 4/7] io_uring: split __io_cqring_overflow_flush() Pavel Begunkov
2025-05-17 12:27 ` Pavel Begunkov [this message]
2025-05-17 12:27 ` [PATCH v2 6/7] io_uring: avoid GFP_ATOMIC for overflows if possible Pavel Begunkov
2025-05-17 12:27 ` [PATCH v2 7/7] io_uring: add lockdep warning for overflow posting Pavel Begunkov
2025-05-21 13:02 ` (subset) [PATCH v2 0/7] simplify overflow CQE handling Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=396cf940ec7788299dd0dc278416295ea94ae277.1747483784.git.asml.silence@gmail.com \
--to=asml.silence@gmail.com \
--cc=io-uring@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox