public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: Pavel Begunkov <asml.silence@gmail.com>, io-uring@vger.kernel.org
Subject: Re: [PATCH 2/4] io_uring: move locking inside overflow posting
Date: Wed, 14 May 2025 15:52:21 -0600	[thread overview]
Message-ID: <a645a6a2-d722-4ef1-bdfd-3701ab315584@kernel.dk> (raw)
In-Reply-To: <1644225f-36c9-4abf-8da3-cc853cdab0e8@kernel.dk>

Since sometimes it's easier to talk in code that in English, something
like the below (just applied on top, and utterly untested) I think is
much cleaner. Didn't spend a lot of time on it, I'm sure it could get
condensed down some more with a helper or something. But it keeps the
locking in the caller, while still allowing GFP_KERNEL alloc for
lockless_cq.

Somewhat unrelated, but also fills in an io_cqe and passes in for the
non-cqe32 parts, just for readability's sake.


diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index da1075b66a87..9b6d09633a29 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -744,44 +744,12 @@ static bool __io_cqring_event_overflow(struct io_ring_ctx *ctx,
 	return true;
 }
 
-static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, bool locked,
-				     u64 user_data, s32 res, u32 cflags,
-				     u64 extra1, u64 extra2)
+static void io_cqring_event_overflow(struct io_ring_ctx *ctx,
+				     struct io_overflow_cqe *ocqe)
 {
-	struct io_overflow_cqe *ocqe;
-	size_t ocq_size = sizeof(struct io_overflow_cqe);
-	bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
-	gfp_t gfp = GFP_KERNEL_ACCOUNT;
-	bool queued;
-
-	io_lockdep_assert_cq_locked(ctx);
-
-	if (is_cqe32)
-		ocq_size += sizeof(struct io_uring_cqe);
-	if (locked)
-		gfp = GFP_ATOMIC | __GFP_ACCOUNT;
-
-	ocqe = kmalloc(ocq_size, gfp);
-	trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
-
-	if (ocqe) {
-		ocqe->cqe.user_data = user_data;
-		ocqe->cqe.res = res;
-		ocqe->cqe.flags = cflags;
-		if (is_cqe32) {
-			ocqe->cqe.big_cqe[0] = extra1;
-			ocqe->cqe.big_cqe[1] = extra2;
-		}
-	}
-
-	if (locked) {
-		queued = __io_cqring_event_overflow(ctx, ocqe);
-	} else {
-		spin_lock(&ctx->completion_lock);
-		queued = __io_cqring_event_overflow(ctx, ocqe);
-		spin_unlock(&ctx->completion_lock);
-	}
-	return queued;
+	spin_lock(&ctx->completion_lock);
+	__io_cqring_event_overflow(ctx, ocqe);
+	spin_unlock(&ctx->completion_lock);
 }
 
 /*
@@ -842,15 +810,49 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
 	return false;
 }
 
+static struct io_overflow_cqe *io_get_ocqe(struct io_ring_ctx *ctx,
+					   struct io_cqe *cqe, u64 extra1,
+					   u64 extra2, gfp_t gfp)
+{
+	size_t ocq_size = sizeof(struct io_overflow_cqe);
+	bool is_cqe32 = ctx->flags & IORING_SETUP_CQE32;
+	struct io_overflow_cqe *ocqe;
+
+	if (is_cqe32)
+		ocq_size += sizeof(struct io_uring_cqe);
+
+	ocqe = kmalloc(ocq_size, gfp);
+	trace_io_uring_cqe_overflow(ctx, cqe->user_data, cqe->res, cqe->flags, ocqe);
+	if (ocqe) {
+		ocqe->cqe.user_data = cqe->user_data;
+		ocqe->cqe.res = cqe->res;
+		ocqe->cqe.flags = cqe->flags;
+		if (is_cqe32) {
+			ocqe->cqe.big_cqe[0] = extra1;
+			ocqe->cqe.big_cqe[1] = extra2;
+		}
+	}
+
+	return ocqe;
+}
+
 bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
 {
 	bool filled;
 
 	io_cq_lock(ctx);
 	filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
-	if (!filled)
-		filled = io_cqring_event_overflow(ctx, true,
-						  user_data, res, cflags, 0, 0);
+	if (unlikely(!filled)) {
+		struct io_cqe cqe = {
+			.user_data	= user_data,
+			.res		= res,
+			.flags		= cflags
+		};
+		struct io_overflow_cqe *ocqe;
+
+		ocqe = io_get_ocqe(ctx, &cqe, 0, 0, GFP_ATOMIC);
+		filled = __io_cqring_event_overflow(ctx, ocqe);
+	}
 	io_cq_unlock_post(ctx);
 	return filled;
 }
@@ -864,8 +866,17 @@ void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
 	lockdep_assert_held(&ctx->uring_lock);
 	lockdep_assert(ctx->lockless_cq);
 
-	if (!io_fill_cqe_aux(ctx, user_data, res, cflags))
-		io_cqring_event_overflow(ctx, false, user_data, res, cflags, 0, 0);
+	if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
+		struct io_cqe cqe = {
+			.user_data	= user_data,
+			.res		= res,
+			.flags		= cflags
+		};
+		struct io_overflow_cqe *ocqe;
+
+		ocqe = io_get_ocqe(ctx, &cqe, 0, 0, GFP_KERNEL);
+		io_cqring_event_overflow(ctx, ocqe);
+	}
 
 	ctx->submit_state.cq_flush = true;
 }
@@ -1437,6 +1448,20 @@ static void io_free_batch_list(struct io_ring_ctx *ctx,
 	} while (node);
 }
 
+static __cold void io_cqe_overflow_fill(struct io_ring_ctx *ctx,
+					struct io_kiocb *req)
+{
+	gfp_t gfp = ctx->lockless_cq ? GFP_KERNEL : GFP_ATOMIC;
+	struct io_overflow_cqe *ocqe;
+
+	ocqe = io_get_ocqe(ctx, &req->cqe, req->big_cqe.extra1, req->big_cqe.extra2, gfp);
+	if (ctx->lockless_cq)
+		io_cqring_event_overflow(ctx, ocqe);
+	else
+		__io_cqring_event_overflow(ctx, ocqe);
+	memset(&req->big_cqe, 0, sizeof(req->big_cqe));
+}
+
 void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 	__must_hold(&ctx->uring_lock)
 {
@@ -1453,17 +1478,10 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 		 * will go through the io-wq retry machinery and post one
 		 * later.
 		 */
-		if (!(req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE)) &&
-		    unlikely(!io_fill_cqe_req(ctx, req))) {
-			bool locked = !ctx->lockless_cq;
-
-			io_cqring_event_overflow(req->ctx, locked,
-						req->cqe.user_data,
-						req->cqe.res, req->cqe.flags,
-						req->big_cqe.extra1,
-						req->big_cqe.extra2);
-			memset(&req->big_cqe, 0, sizeof(req->big_cqe));
-		}
+		if (req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE))
+			continue;
+		if (unlikely(!io_fill_cqe_req(ctx, req)))
+			io_cqe_overflow_fill(ctx, req);
 	}
 	__io_cq_unlock_post(ctx);
 

-- 
Jens Axboe

  reply	other threads:[~2025-05-14 21:52 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-14  8:07 [PATCH 0/4] overflow completion enhancements Pavel Begunkov
2025-05-14  8:07 ` [PATCH 1/4] io_uring: open code io_req_cqe_overflow() Pavel Begunkov
2025-05-14  8:07 ` [PATCH 2/4] io_uring: move locking inside overflow posting Pavel Begunkov
2025-05-14 16:42   ` Jens Axboe
2025-05-14 17:18     ` Pavel Begunkov
2025-05-14 19:25       ` Jens Axboe
2025-05-14 20:00         ` Pavel Begunkov
2025-05-14 20:05           ` Jens Axboe
2025-05-14 21:52             ` Jens Axboe [this message]
2025-05-15 11:04               ` Pavel Begunkov
2025-05-14  8:07 ` [PATCH 3/4] io_uring: alloc overflow entry before locking Pavel Begunkov
2025-05-14  8:07 ` [PATCH 4/4] io_uring: add lockdep warning for overflow posting Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a645a6a2-d722-4ef1-bdfd-3701ab315584@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=asml.silence@gmail.com \
    --cc=io-uring@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox