From: Jens Axboe <axboe@kernel.dk>
To: Pavel Begunkov <asml.silence@gmail.com>, io-uring@vger.kernel.org
Subject: Re: [PATCH 2/4] io_uring: move locking inside overflow posting
Date: Wed, 14 May 2025 15:52:21 -0600 [thread overview]
Message-ID: <a645a6a2-d722-4ef1-bdfd-3701ab315584@kernel.dk> (raw)
In-Reply-To: <1644225f-36c9-4abf-8da3-cc853cdab0e8@kernel.dk>
Since sometimes it's easier to talk in code that in English, something
like the below (just applied on top, and utterly untested) I think is
much cleaner. Didn't spend a lot of time on it, I'm sure it could get
condensed down some more with a helper or something. But it keeps the
locking in the caller, while still allowing GFP_KERNEL alloc for
lockless_cq.
Somewhat unrelated, but also fills in an io_cqe and passes in for the
non-cqe32 parts, just for readability's sake.
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index da1075b66a87..9b6d09633a29 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -744,44 +744,12 @@ static bool __io_cqring_event_overflow(struct io_ring_ctx *ctx,
return true;
}
-static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, bool locked,
- u64 user_data, s32 res, u32 cflags,
- u64 extra1, u64 extra2)
+static void io_cqring_event_overflow(struct io_ring_ctx *ctx,
+ struct io_overflow_cqe *ocqe)
{
- struct io_overflow_cqe *ocqe;
- size_t ocq_size = sizeof(struct io_overflow_cqe);
- bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
- gfp_t gfp = GFP_KERNEL_ACCOUNT;
- bool queued;
-
- io_lockdep_assert_cq_locked(ctx);
-
- if (is_cqe32)
- ocq_size += sizeof(struct io_uring_cqe);
- if (locked)
- gfp = GFP_ATOMIC | __GFP_ACCOUNT;
-
- ocqe = kmalloc(ocq_size, gfp);
- trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
-
- if (ocqe) {
- ocqe->cqe.user_data = user_data;
- ocqe->cqe.res = res;
- ocqe->cqe.flags = cflags;
- if (is_cqe32) {
- ocqe->cqe.big_cqe[0] = extra1;
- ocqe->cqe.big_cqe[1] = extra2;
- }
- }
-
- if (locked) {
- queued = __io_cqring_event_overflow(ctx, ocqe);
- } else {
- spin_lock(&ctx->completion_lock);
- queued = __io_cqring_event_overflow(ctx, ocqe);
- spin_unlock(&ctx->completion_lock);
- }
- return queued;
+ spin_lock(&ctx->completion_lock);
+ __io_cqring_event_overflow(ctx, ocqe);
+ spin_unlock(&ctx->completion_lock);
}
/*
@@ -842,15 +810,49 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
return false;
}
+static struct io_overflow_cqe *io_get_ocqe(struct io_ring_ctx *ctx,
+ struct io_cqe *cqe, u64 extra1,
+ u64 extra2, gfp_t gfp)
+{
+ size_t ocq_size = sizeof(struct io_overflow_cqe);
+ bool is_cqe32 = ctx->flags & IORING_SETUP_CQE32;
+ struct io_overflow_cqe *ocqe;
+
+ if (is_cqe32)
+ ocq_size += sizeof(struct io_uring_cqe);
+
+ ocqe = kmalloc(ocq_size, gfp);
+ trace_io_uring_cqe_overflow(ctx, cqe->user_data, cqe->res, cqe->flags, ocqe);
+ if (ocqe) {
+ ocqe->cqe.user_data = cqe->user_data;
+ ocqe->cqe.res = cqe->res;
+ ocqe->cqe.flags = cqe->flags;
+ if (is_cqe32) {
+ ocqe->cqe.big_cqe[0] = extra1;
+ ocqe->cqe.big_cqe[1] = extra2;
+ }
+ }
+
+ return ocqe;
+}
+
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
{
bool filled;
io_cq_lock(ctx);
filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
- if (!filled)
- filled = io_cqring_event_overflow(ctx, true,
- user_data, res, cflags, 0, 0);
+ if (unlikely(!filled)) {
+ struct io_cqe cqe = {
+ .user_data = user_data,
+ .res = res,
+ .flags = cflags
+ };
+ struct io_overflow_cqe *ocqe;
+
+ ocqe = io_get_ocqe(ctx, &cqe, 0, 0, GFP_ATOMIC);
+ filled = __io_cqring_event_overflow(ctx, ocqe);
+ }
io_cq_unlock_post(ctx);
return filled;
}
@@ -864,8 +866,17 @@ void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
lockdep_assert_held(&ctx->uring_lock);
lockdep_assert(ctx->lockless_cq);
- if (!io_fill_cqe_aux(ctx, user_data, res, cflags))
- io_cqring_event_overflow(ctx, false, user_data, res, cflags, 0, 0);
+ if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
+ struct io_cqe cqe = {
+ .user_data = user_data,
+ .res = res,
+ .flags = cflags
+ };
+ struct io_overflow_cqe *ocqe;
+
+ ocqe = io_get_ocqe(ctx, &cqe, 0, 0, GFP_KERNEL);
+ io_cqring_event_overflow(ctx, ocqe);
+ }
ctx->submit_state.cq_flush = true;
}
@@ -1437,6 +1448,20 @@ static void io_free_batch_list(struct io_ring_ctx *ctx,
} while (node);
}
+static __cold void io_cqe_overflow_fill(struct io_ring_ctx *ctx,
+ struct io_kiocb *req)
+{
+ gfp_t gfp = ctx->lockless_cq ? GFP_KERNEL : GFP_ATOMIC;
+ struct io_overflow_cqe *ocqe;
+
+ ocqe = io_get_ocqe(ctx, &req->cqe, req->big_cqe.extra1, req->big_cqe.extra2, gfp);
+ if (ctx->lockless_cq)
+ io_cqring_event_overflow(ctx, ocqe);
+ else
+ __io_cqring_event_overflow(ctx, ocqe);
+ memset(&req->big_cqe, 0, sizeof(req->big_cqe));
+}
+
void __io_submit_flush_completions(struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock)
{
@@ -1453,17 +1478,10 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
* will go through the io-wq retry machinery and post one
* later.
*/
- if (!(req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE)) &&
- unlikely(!io_fill_cqe_req(ctx, req))) {
- bool locked = !ctx->lockless_cq;
-
- io_cqring_event_overflow(req->ctx, locked,
- req->cqe.user_data,
- req->cqe.res, req->cqe.flags,
- req->big_cqe.extra1,
- req->big_cqe.extra2);
- memset(&req->big_cqe, 0, sizeof(req->big_cqe));
- }
+ if (req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE))
+ continue;
+ if (unlikely(!io_fill_cqe_req(ctx, req)))
+ io_cqe_overflow_fill(ctx, req);
}
__io_cq_unlock_post(ctx);
--
Jens Axboe
next prev parent reply other threads:[~2025-05-14 21:52 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-14 8:07 [PATCH 0/4] overflow completion enhancements Pavel Begunkov
2025-05-14 8:07 ` [PATCH 1/4] io_uring: open code io_req_cqe_overflow() Pavel Begunkov
2025-05-14 8:07 ` [PATCH 2/4] io_uring: move locking inside overflow posting Pavel Begunkov
2025-05-14 16:42 ` Jens Axboe
2025-05-14 17:18 ` Pavel Begunkov
2025-05-14 19:25 ` Jens Axboe
2025-05-14 20:00 ` Pavel Begunkov
2025-05-14 20:05 ` Jens Axboe
2025-05-14 21:52 ` Jens Axboe [this message]
2025-05-15 11:04 ` Pavel Begunkov
2025-05-14 8:07 ` [PATCH 3/4] io_uring: alloc overflow entry before locking Pavel Begunkov
2025-05-14 8:07 ` [PATCH 4/4] io_uring: add lockdep warning for overflow posting Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=a645a6a2-d722-4ef1-bdfd-3701ab315584@kernel.dk \
--to=axboe@kernel.dk \
--cc=asml.silence@gmail.com \
--cc=io-uring@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox