* [PATCHSET v2 0/3] Allow non-atomic allocs for overflows
@ 2025-05-16 16:55 Jens Axboe
2025-05-16 16:55 ` [PATCH 1/3] io_uring: open code io_req_cqe_overflow() Jens Axboe
` (3 more replies)
0 siblings, 4 replies; 6+ messages in thread
From: Jens Axboe @ 2025-05-16 16:55 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, csander
Hi,
This is heavily inspired by the series that Pavel posted here:
https://lore.kernel.org/io-uring/cover.1747209332.git.asml.silence@gmail.com/
since I do think that potentially increasing the reliability of overflow
handling is a worthy endeavour. It's just somewhat simpler as it doesn't
move anything around really, it just does the split of allocating
the overflow entry separately from adding it to the io_ring_ctx context.
Further cleanups could be done on top of this, obviously.
io_uring/io_uring.c | 75 ++++++++++++++++++++++++++++-----------------
1 file changed, 47 insertions(+), 28 deletions(-)
Since v1:
- Include patch from Pavel open coding io_req_cqe_overflow()
- Fix silly thinko in __io_submit_flush_completions()
--
Jens Axboe
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 1/3] io_uring: open code io_req_cqe_overflow()
2025-05-16 16:55 [PATCHSET v2 0/3] Allow non-atomic allocs for overflows Jens Axboe
@ 2025-05-16 16:55 ` Jens Axboe
2025-05-16 18:10 ` Caleb Sander Mateos
2025-05-16 16:55 ` [PATCH 2/3] io_uring: split alloc and add of overflow Jens Axboe
` (2 subsequent siblings)
3 siblings, 1 reply; 6+ messages in thread
From: Jens Axboe @ 2025-05-16 16:55 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, csander, Jens Axboe
From: Pavel Begunkov <asml.silence@gmail.com>
A preparation patch, just open code io_req_cqe_overflow().
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
io_uring/io_uring.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 9a9b8d35349b..068e140b6bd8 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -760,14 +760,6 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
return true;
}
-static void io_req_cqe_overflow(struct io_kiocb *req)
-{
- io_cqring_event_overflow(req->ctx, req->cqe.user_data,
- req->cqe.res, req->cqe.flags,
- req->big_cqe.extra1, req->big_cqe.extra2);
- memset(&req->big_cqe, 0, sizeof(req->big_cqe));
-}
-
/*
* writes to the cq entry need to come after reading head; the
* control dependency is enough as we're using WRITE_ONCE to
@@ -1442,11 +1434,19 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
unlikely(!io_fill_cqe_req(ctx, req))) {
if (ctx->lockless_cq) {
spin_lock(&ctx->completion_lock);
- io_req_cqe_overflow(req);
+ io_cqring_event_overflow(req->ctx, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags,
+ req->big_cqe.extra1,
+ req->big_cqe.extra2);
spin_unlock(&ctx->completion_lock);
} else {
- io_req_cqe_overflow(req);
+ io_cqring_event_overflow(req->ctx, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags,
+ req->big_cqe.extra1,
+ req->big_cqe.extra2);
}
+
+ memset(&req->big_cqe, 0, sizeof(req->big_cqe));
}
}
__io_cq_unlock_post(ctx);
--
2.49.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/3] io_uring: split alloc and add of overflow
2025-05-16 16:55 [PATCHSET v2 0/3] Allow non-atomic allocs for overflows Jens Axboe
2025-05-16 16:55 ` [PATCH 1/3] io_uring: open code io_req_cqe_overflow() Jens Axboe
@ 2025-05-16 16:55 ` Jens Axboe
2025-05-16 16:55 ` [PATCH 3/3] io_uring: make io_alloc_ocqe() take a struct io_cqe pointer Jens Axboe
2025-05-16 18:50 ` [PATCH 4/3] io_uring: add __io_cqring_add_overflow() helper Jens Axboe
3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2025-05-16 16:55 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, csander, Jens Axboe
Add a new helper, io_alloc_ocqe(), that simply allocates and fills an
overflow entry. Then it can get done outside of the locking section,
and hence use more appropriate gfp_t allocation flags rather than always
default to GFP_ATOMIC.
Inspired by a previous series from Pavel:
https://lore.kernel.org/io-uring/cover.1747209332.git.asml.silence@gmail.com/
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
io_uring/io_uring.c | 75 +++++++++++++++++++++++++++------------------
1 file changed, 45 insertions(+), 30 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 068e140b6bd8..2ee002f878ba 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -718,20 +718,11 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
}
}
-static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
- s32 res, u32 cflags, u64 extra1, u64 extra2)
+static bool io_cqring_add_overflow(struct io_ring_ctx *ctx,
+ struct io_overflow_cqe *ocqe)
{
- struct io_overflow_cqe *ocqe;
- size_t ocq_size = sizeof(struct io_overflow_cqe);
- bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
-
lockdep_assert_held(&ctx->completion_lock);
- if (is_cqe32)
- ocq_size += sizeof(struct io_uring_cqe);
-
- ocqe = kmalloc(ocq_size, GFP_ATOMIC | __GFP_ACCOUNT);
- trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
if (!ocqe) {
struct io_rings *r = ctx->rings;
@@ -749,17 +740,35 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
atomic_or(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags);
}
- ocqe->cqe.user_data = user_data;
- ocqe->cqe.res = res;
- ocqe->cqe.flags = cflags;
- if (is_cqe32) {
- ocqe->cqe.big_cqe[0] = extra1;
- ocqe->cqe.big_cqe[1] = extra2;
- }
list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
return true;
}
+static struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx,
+ u64 user_data, s32 res, u32 cflags,
+ u64 extra1, u64 extra2, gfp_t gfp)
+{
+ struct io_overflow_cqe *ocqe;
+ size_t ocq_size = sizeof(struct io_overflow_cqe);
+ bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
+
+ if (is_cqe32)
+ ocq_size += sizeof(struct io_uring_cqe);
+
+ ocqe = kmalloc(ocq_size, gfp | __GFP_ACCOUNT);
+ trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
+ if (ocqe) {
+ ocqe->cqe.user_data = user_data;
+ ocqe->cqe.res = res;
+ ocqe->cqe.flags = cflags;
+ if (is_cqe32) {
+ ocqe->cqe.big_cqe[0] = extra1;
+ ocqe->cqe.big_cqe[1] = extra2;
+ }
+ }
+ return ocqe;
+}
+
/*
* writes to the cq entry need to come after reading head; the
* control dependency is enough as we're using WRITE_ONCE to
@@ -824,8 +833,12 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
io_cq_lock(ctx);
filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
- if (!filled)
- filled = io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
+ if (unlikely(!filled)) {
+ struct io_overflow_cqe *ocqe;
+
+ ocqe = io_alloc_ocqe(ctx, user_data, res, cflags, 0, 0, GFP_ATOMIC);
+ filled = io_cqring_add_overflow(ctx, ocqe);
+ }
io_cq_unlock_post(ctx);
return filled;
}
@@ -840,8 +853,11 @@ void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
lockdep_assert(ctx->lockless_cq);
if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
+ struct io_overflow_cqe *ocqe;
+
+ ocqe = io_alloc_ocqe(ctx, user_data, res, cflags, 0, 0, GFP_KERNEL);
spin_lock(&ctx->completion_lock);
- io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
+ io_cqring_add_overflow(ctx, ocqe);
spin_unlock(&ctx->completion_lock);
}
ctx->submit_state.cq_flush = true;
@@ -1432,20 +1448,19 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
*/
if (!(req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE)) &&
unlikely(!io_fill_cqe_req(ctx, req))) {
+ gfp_t gfp = ctx->lockless_cq ? GFP_KERNEL : GFP_ATOMIC;
+ struct io_overflow_cqe *ocqe;
+
+ ocqe = io_alloc_ocqe(ctx, req->cqe.user_data, req->cqe.res,
+ req->cqe.flags, req->big_cqe.extra1,
+ req->big_cqe.extra2, gfp);
if (ctx->lockless_cq) {
spin_lock(&ctx->completion_lock);
- io_cqring_event_overflow(req->ctx, req->cqe.user_data,
- req->cqe.res, req->cqe.flags,
- req->big_cqe.extra1,
- req->big_cqe.extra2);
+ io_cqring_add_overflow(ctx, ocqe);
spin_unlock(&ctx->completion_lock);
} else {
- io_cqring_event_overflow(req->ctx, req->cqe.user_data,
- req->cqe.res, req->cqe.flags,
- req->big_cqe.extra1,
- req->big_cqe.extra2);
+ io_cqring_add_overflow(ctx, ocqe);
}
-
memset(&req->big_cqe, 0, sizeof(req->big_cqe));
}
}
--
2.49.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 3/3] io_uring: make io_alloc_ocqe() take a struct io_cqe pointer
2025-05-16 16:55 [PATCHSET v2 0/3] Allow non-atomic allocs for overflows Jens Axboe
2025-05-16 16:55 ` [PATCH 1/3] io_uring: open code io_req_cqe_overflow() Jens Axboe
2025-05-16 16:55 ` [PATCH 2/3] io_uring: split alloc and add of overflow Jens Axboe
@ 2025-05-16 16:55 ` Jens Axboe
2025-05-16 18:50 ` [PATCH 4/3] io_uring: add __io_cqring_add_overflow() helper Jens Axboe
3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2025-05-16 16:55 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, csander, Jens Axboe
The number of arguments to io_alloc_ocqe() is a bit unwieldy. Make it
take a struct io_cqe pointer rather than three sepearate CQE args. One
path already has that readily available, add an io_init_cqe() helper for
the remainding two.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
io_uring/io_uring.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 2ee002f878ba..680374bceb52 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -745,8 +745,8 @@ static bool io_cqring_add_overflow(struct io_ring_ctx *ctx,
}
static struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx,
- u64 user_data, s32 res, u32 cflags,
- u64 extra1, u64 extra2, gfp_t gfp)
+ struct io_cqe *cqe, u64 extra1,
+ u64 extra2, gfp_t gfp)
{
struct io_overflow_cqe *ocqe;
size_t ocq_size = sizeof(struct io_overflow_cqe);
@@ -756,11 +756,11 @@ static struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx,
ocq_size += sizeof(struct io_uring_cqe);
ocqe = kmalloc(ocq_size, gfp | __GFP_ACCOUNT);
- trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
+ trace_io_uring_cqe_overflow(ctx, cqe->user_data, cqe->res, cqe->flags, ocqe);
if (ocqe) {
- ocqe->cqe.user_data = user_data;
- ocqe->cqe.res = res;
- ocqe->cqe.flags = cflags;
+ ocqe->cqe.user_data = cqe->user_data;
+ ocqe->cqe.res = cqe->res;
+ ocqe->cqe.flags = cqe->flags;
if (is_cqe32) {
ocqe->cqe.big_cqe[0] = extra1;
ocqe->cqe.big_cqe[1] = extra2;
@@ -827,6 +827,9 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
return false;
}
+#define io_init_cqe(user_data, res, cflags) \
+ (struct io_cqe) { .user_data = user_data, .res = res, .flags = cflags }
+
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
{
bool filled;
@@ -835,8 +838,9 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
if (unlikely(!filled)) {
struct io_overflow_cqe *ocqe;
+ struct io_cqe cqe = io_init_cqe(user_data, res, cflags);
- ocqe = io_alloc_ocqe(ctx, user_data, res, cflags, 0, 0, GFP_ATOMIC);
+ ocqe = io_alloc_ocqe(ctx, &cqe, 0, 0, GFP_ATOMIC);
filled = io_cqring_add_overflow(ctx, ocqe);
}
io_cq_unlock_post(ctx);
@@ -854,8 +858,9 @@ void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
struct io_overflow_cqe *ocqe;
+ struct io_cqe cqe = io_init_cqe(user_data, res, cflags);
- ocqe = io_alloc_ocqe(ctx, user_data, res, cflags, 0, 0, GFP_KERNEL);
+ ocqe = io_alloc_ocqe(ctx, &cqe, 0, 0, GFP_KERNEL);
spin_lock(&ctx->completion_lock);
io_cqring_add_overflow(ctx, ocqe);
spin_unlock(&ctx->completion_lock);
@@ -1451,8 +1456,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
gfp_t gfp = ctx->lockless_cq ? GFP_KERNEL : GFP_ATOMIC;
struct io_overflow_cqe *ocqe;
- ocqe = io_alloc_ocqe(ctx, req->cqe.user_data, req->cqe.res,
- req->cqe.flags, req->big_cqe.extra1,
+ ocqe = io_alloc_ocqe(ctx, &req->cqe, req->big_cqe.extra1,
req->big_cqe.extra2, gfp);
if (ctx->lockless_cq) {
spin_lock(&ctx->completion_lock);
--
2.49.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH 1/3] io_uring: open code io_req_cqe_overflow()
2025-05-16 16:55 ` [PATCH 1/3] io_uring: open code io_req_cqe_overflow() Jens Axboe
@ 2025-05-16 18:10 ` Caleb Sander Mateos
0 siblings, 0 replies; 6+ messages in thread
From: Caleb Sander Mateos @ 2025-05-16 18:10 UTC (permalink / raw)
To: Jens Axboe; +Cc: io-uring, asml.silence
On Fri, May 16, 2025 at 9:55 AM Jens Axboe <axboe@kernel.dk> wrote:
>
> From: Pavel Begunkov <asml.silence@gmail.com>
>
> A preparation patch, just open code io_req_cqe_overflow().
>
> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 4/3] io_uring: add __io_cqring_add_overflow() helper
2025-05-16 16:55 [PATCHSET v2 0/3] Allow non-atomic allocs for overflows Jens Axboe
` (2 preceding siblings ...)
2025-05-16 16:55 ` [PATCH 3/3] io_uring: make io_alloc_ocqe() take a struct io_cqe pointer Jens Axboe
@ 2025-05-16 18:50 ` Jens Axboe
3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2025-05-16 18:50 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, csander
This splits the overflow addition into the typical two helpers, one that
already holds ->completion_lock, if it needs it, and one that grabs it.
Also mark both of them as __cold, as this should be mostly out of line
invocations, and ditto for the ocqe allocation helper.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index b50c2d434e74..d14a9b5eeb59 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -697,8 +697,8 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
}
}
-static bool io_cqring_add_overflow(struct io_ring_ctx *ctx,
- struct io_overflow_cqe *ocqe)
+static __cold bool __io_cqring_add_overflow(struct io_ring_ctx *ctx,
+ struct io_overflow_cqe *ocqe)
{
lockdep_assert_held(&ctx->completion_lock);
@@ -723,9 +723,19 @@ static bool io_cqring_add_overflow(struct io_ring_ctx *ctx,
return true;
}
-static struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx,
- struct io_cqe *cqe, u64 extra1,
- u64 extra2, gfp_t gfp)
+
+static __cold void io_cqring_add_overflow(struct io_ring_ctx *ctx,
+ struct io_overflow_cqe *ocqe)
+{
+ spin_lock(&ctx->completion_lock);
+ __io_cqring_add_overflow(ctx, ocqe);
+ spin_unlock(&ctx->completion_lock);
+}
+
+static __cold struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx,
+ struct io_cqe *cqe,
+ u64 extra1, u64 extra2,
+ gfp_t gfp)
{
struct io_overflow_cqe *ocqe;
size_t ocq_size = sizeof(struct io_overflow_cqe);
@@ -820,7 +830,7 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
struct io_cqe cqe = io_init_cqe(user_data, res, cflags);
ocqe = io_alloc_ocqe(ctx, &cqe, 0, 0, GFP_ATOMIC);
- filled = io_cqring_add_overflow(ctx, ocqe);
+ filled = __io_cqring_add_overflow(ctx, ocqe);
}
io_cq_unlock_post(ctx);
return filled;
@@ -840,9 +850,7 @@ void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
struct io_cqe cqe = io_init_cqe(user_data, res, cflags);
ocqe = io_alloc_ocqe(ctx, &cqe, 0, 0, GFP_KERNEL);
- spin_lock(&ctx->completion_lock);
io_cqring_add_overflow(ctx, ocqe);
- spin_unlock(&ctx->completion_lock);
}
ctx->submit_state.cq_flush = true;
}
@@ -1451,13 +1459,10 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
ocqe = io_alloc_ocqe(ctx, &req->cqe, req->big_cqe.extra1,
req->big_cqe.extra2, gfp);
- if (ctx->lockless_cq) {
- spin_lock(&ctx->completion_lock);
+ if (ctx->lockless_cq)
io_cqring_add_overflow(ctx, ocqe);
- spin_unlock(&ctx->completion_lock);
- } else {
- io_cqring_add_overflow(ctx, ocqe);
- }
+ else
+ __io_cqring_add_overflow(ctx, ocqe);
memset(&req->big_cqe, 0, sizeof(req->big_cqe));
}
}
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2025-05-16 18:50 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-16 16:55 [PATCHSET v2 0/3] Allow non-atomic allocs for overflows Jens Axboe
2025-05-16 16:55 ` [PATCH 1/3] io_uring: open code io_req_cqe_overflow() Jens Axboe
2025-05-16 18:10 ` Caleb Sander Mateos
2025-05-16 16:55 ` [PATCH 2/3] io_uring: split alloc and add of overflow Jens Axboe
2025-05-16 16:55 ` [PATCH 3/3] io_uring: make io_alloc_ocqe() take a struct io_cqe pointer Jens Axboe
2025-05-16 18:50 ` [PATCH 4/3] io_uring: add __io_cqring_add_overflow() helper Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox