[PATCH v2 5/7] io_uring: separate lock for protecting overflow list

public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed

From: Pavel Begunkov <asml.silence@gmail.com>
To: io-uring@vger.kernel.org
Cc: asml.silence@gmail.com
Subject: [PATCH v2 5/7] io_uring: separate lock for protecting overflow list
Date: Sat, 17 May 2025 13:27:41 +0100	[thread overview]
Message-ID: <396cf940ec7788299dd0dc278416295ea94ae277.1747483784.git.asml.silence@gmail.com> (raw)
In-Reply-To: <cover.1747483784.git.asml.silence@gmail.com>

Introduce ->overflow_lock to protect all overflow ctx fields. With that
the caller is allowed but not always required to hold the completion
lock while overflowing.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 include/linux/io_uring_types.h |  1 +
 io_uring/io_uring.c            | 32 ++++++++++++--------------------
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 00dbd7cd0e7d..e11ab9d19877 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -370,6 +370,7 @@ struct io_ring_ctx {
 	spinlock_t		completion_lock;
 
 	struct list_head	cq_overflow_list;
+	spinlock_t		overflow_lock;
 
 	struct hlist_head	waitid_list;
 
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index a2a4e1319033..86b39a01a136 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -350,6 +350,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	init_waitqueue_head(&ctx->cq_wait);
 	init_waitqueue_head(&ctx->poll_wq);
 	spin_lock_init(&ctx->completion_lock);
+	spin_lock_init(&ctx->overflow_lock);
 	raw_spin_lock_init(&ctx->timeout_lock);
 	INIT_WQ_LIST(&ctx->iopoll_list);
 	INIT_LIST_HEAD(&ctx->defer_list);
@@ -624,6 +625,8 @@ static bool io_flush_overflow_list(struct io_ring_ctx *ctx, bool dying)
 	if (ctx->flags & IORING_SETUP_CQE32)
 		cqe_size <<= 1;
 
+	guard(spinlock)(&ctx->overflow_lock);
+
 	while (!list_empty(&ctx->cq_overflow_list)) {
 		struct io_uring_cqe *cqe;
 		struct io_overflow_cqe *ocqe;
@@ -733,8 +736,6 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
 	size_t ocq_size = sizeof(struct io_overflow_cqe);
 	bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
 
-	lockdep_assert_held(&ctx->completion_lock);
-
 	if (is_cqe32)
 		ocq_size += sizeof(struct io_uring_cqe);
 
@@ -750,6 +751,9 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
 	}
 
 	trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
+
+	guard(spinlock)(&ctx->overflow_lock);
+
 	if (!ocqe) {
 		struct io_rings *r = ctx->rings;
 
@@ -849,11 +853,9 @@ void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
 	lockdep_assert_held(&ctx->uring_lock);
 	lockdep_assert(ctx->lockless_cq);
 
-	if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
-		spin_lock(&ctx->completion_lock);
+	if (!io_fill_cqe_aux(ctx, user_data, res, cflags))
 		io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
-		spin_unlock(&ctx->completion_lock);
-	}
+
 	ctx->submit_state.cq_flush = true;
 }
 
@@ -1442,20 +1444,10 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 		 */
 		if (!(req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE)) &&
 		    unlikely(!io_fill_cqe_req(ctx, req))) {
-			if (ctx->lockless_cq) {
-				spin_lock(&ctx->completion_lock);
-				io_cqring_event_overflow(req->ctx, req->cqe.user_data,
-							req->cqe.res, req->cqe.flags,
-							req->big_cqe.extra1,
-							req->big_cqe.extra2);
-				spin_unlock(&ctx->completion_lock);
-			} else {
-				io_cqring_event_overflow(req->ctx, req->cqe.user_data,
-							req->cqe.res, req->cqe.flags,
-							req->big_cqe.extra1,
-							req->big_cqe.extra2);
-			}
-
+			io_cqring_event_overflow(req->ctx, req->cqe.user_data,
+						req->cqe.res, req->cqe.flags,
+						req->big_cqe.extra1,
+						req->big_cqe.extra2);
 			memset(&req->big_cqe, 0, sizeof(req->big_cqe));
 		}
 	}
-- 
2.49.0

next prev parent reply	other threads:[~2025-05-17 12:26 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-17 12:27 [PATCH v2 0/7] simplify overflow CQE handling Pavel Begunkov
2025-05-17 12:27 ` [PATCH v2 1/7] io_uring: fix overflow resched cqe reordering Pavel Begunkov
2025-05-17 12:27 ` [PATCH v2 2/7] io_uring: init overflow entry before passing to tracing Pavel Begunkov
2025-05-17 12:27 ` [PATCH v2 3/7] io_uring: open code io_req_cqe_overflow() Pavel Begunkov
2025-05-17 12:27 ` [PATCH v2 4/7] io_uring: split __io_cqring_overflow_flush() Pavel Begunkov
2025-05-17 12:27 ` Pavel Begunkov [this message]
2025-05-17 12:27 ` [PATCH v2 6/7] io_uring: avoid GFP_ATOMIC for overflows if possible Pavel Begunkov
2025-05-17 12:27 ` [PATCH v2 7/7] io_uring: add lockdep warning for overflow posting Pavel Begunkov
2025-05-21 13:02 ` (subset) [PATCH v2 0/7] simplify overflow CQE handling Jens Axboe

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:00dbd7cd0e7 dfblob:e11ab9d1987 dfblob:a2a4e131903
dfblob:86b39a01a13 )
 OR (
bs:"[PATCH v2 5/7] io_uring: separate lock for protecting overflow list" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=396cf940ec7788299dd0dc278416295ea94ae277.1747483784.git.asml.silence@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=io-uring@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox