[PATCHSET 0/2] Allow non-atomic allocs for overflows

public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCHSET 0/2] Allow non-atomic allocs for overflows
@ 2025-05-16 16:08 Jens Axboe
  2025-05-16 16:08 ` [PATCH 1/2] io_uring: split alloc and add of overflow Jens Axboe
  2025-05-16 16:08 ` [PATCH 2/2] io_uring: make io_alloc_ocqe() take a struct io_cqe pointer Jens Axboe
  0 siblings, 2 replies; 10+ messages in thread
From: Jens Axboe @ 2025-05-16 16:08 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

Hi,

This is heavily inspired by the series that Pavel posted here:

https://lore.kernel.org/io-uring/cover.1747209332.git.asml.silence@gmail.com/

since I do think that potentially increasing the reliability of overflow
handling is a worthy endeavour. It's just somewhat simpler as it doesn't
move anything around really, it just does the split of allocating
the overflow entry separately from adding it to the io_ring_ctx context.

Further cleanups could be done on top of this, obviously.

 io_uring/io_uring.c | 79 +++++++++++++++++++++++++++++----------------
 1 file changed, 52 insertions(+), 27 deletions(-)

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/2] io_uring: split alloc and add of overflow
  2025-05-16 16:08 [PATCHSET 0/2] Allow non-atomic allocs for overflows Jens Axboe
@ 2025-05-16 16:08 ` Jens Axboe
  2025-05-16 16:31   ` Caleb Sander Mateos
  2025-05-16 16:43   ` Pavel Begunkov
  2025-05-16 16:08 ` [PATCH 2/2] io_uring: make io_alloc_ocqe() take a struct io_cqe pointer Jens Axboe
  1 sibling, 2 replies; 10+ messages in thread
From: Jens Axboe @ 2025-05-16 16:08 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Jens Axboe

Add a new helper, io_alloc_ocqe(), that simply allocates and fills an
overflow entry. Then it can get done outside of the locking section,
and hence use more appropriate gfp_t allocation flags rather than always
default to GFP_ATOMIC.

Suggested-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/io_uring.c | 75 +++++++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 27 deletions(-)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 9a9b8d35349b..2519fab303c4 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -718,20 +718,11 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
 	}
 }
 
-static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
-				     s32 res, u32 cflags, u64 extra1, u64 extra2)
+static bool io_cqring_add_overflow(struct io_ring_ctx *ctx,
+				   struct io_overflow_cqe *ocqe)
 {
-	struct io_overflow_cqe *ocqe;
-	size_t ocq_size = sizeof(struct io_overflow_cqe);
-	bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
-
 	lockdep_assert_held(&ctx->completion_lock);
 
-	if (is_cqe32)
-		ocq_size += sizeof(struct io_uring_cqe);
-
-	ocqe = kmalloc(ocq_size, GFP_ATOMIC | __GFP_ACCOUNT);
-	trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
 	if (!ocqe) {
 		struct io_rings *r = ctx->rings;
 
@@ -749,22 +740,44 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
 		atomic_or(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags);
 
 	}
-	ocqe->cqe.user_data = user_data;
-	ocqe->cqe.res = res;
-	ocqe->cqe.flags = cflags;
-	if (is_cqe32) {
-		ocqe->cqe.big_cqe[0] = extra1;
-		ocqe->cqe.big_cqe[1] = extra2;
-	}
 	list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
 	return true;
 }
 
-static void io_req_cqe_overflow(struct io_kiocb *req)
+static struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx,
+					     u64 user_data, s32 res, u32 cflags,
+					     u64 extra1, u64 extra2, gfp_t gfp)
+{
+	struct io_overflow_cqe *ocqe;
+	size_t ocq_size = sizeof(struct io_overflow_cqe);
+	bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
+
+	if (is_cqe32)
+		ocq_size += sizeof(struct io_uring_cqe);
+
+	ocqe = kmalloc(ocq_size, gfp | __GFP_ACCOUNT);
+	trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
+	if (ocqe) {
+		ocqe->cqe.user_data = user_data;
+		ocqe->cqe.res = res;
+		ocqe->cqe.flags = cflags;
+		if (is_cqe32) {
+			ocqe->cqe.big_cqe[0] = extra1;
+			ocqe->cqe.big_cqe[1] = extra2;
+		}
+	}
+	return ocqe;
+}
+
+static void io_req_cqe_overflow(struct io_kiocb *req, gfp_t gfp)
 {
-	io_cqring_event_overflow(req->ctx, req->cqe.user_data,
-				req->cqe.res, req->cqe.flags,
-				req->big_cqe.extra1, req->big_cqe.extra2);
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_overflow_cqe *ocqe;
+
+	ocqe = io_alloc_ocqe(ctx, req->cqe.user_data, req->cqe.res,
+			     req->cqe.flags, req->big_cqe.extra1,
+			     req->big_cqe.extra2, gfp);
+	io_cqring_add_overflow(ctx, ocqe);
 	memset(&req->big_cqe, 0, sizeof(req->big_cqe));
 }
 
@@ -832,8 +845,12 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
 
 	io_cq_lock(ctx);
 	filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
-	if (!filled)
-		filled = io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
+	if (unlikely(!filled)) {
+		struct io_overflow_cqe *ocqe;
+
+		ocqe = io_alloc_ocqe(ctx, user_data, res, cflags, 0, 0, GFP_ATOMIC);
+		filled = io_cqring_add_overflow(ctx, ocqe);
+	}
 	io_cq_unlock_post(ctx);
 	return filled;
 }
@@ -848,8 +865,11 @@ void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
 	lockdep_assert(ctx->lockless_cq);
 
 	if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
+		struct io_overflow_cqe *ocqe;
+
+		ocqe = io_alloc_ocqe(ctx, user_data, res, cflags, 0, 0, GFP_KERNEL);
 		spin_lock(&ctx->completion_lock);
-		io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
+		io_cqring_add_overflow(ctx, ocqe);
 		spin_unlock(&ctx->completion_lock);
 	}
 	ctx->submit_state.cq_flush = true;
@@ -1442,10 +1462,11 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 		    unlikely(!io_fill_cqe_req(ctx, req))) {
 			if (ctx->lockless_cq) {
 				spin_lock(&ctx->completion_lock);
-				io_req_cqe_overflow(req);
+				io_req_cqe_overflow(req, GFP_ATOMIC);
 				spin_unlock(&ctx->completion_lock);
 			} else {
-				io_req_cqe_overflow(req);
+				gfp_t gfp = ctx->lockless_cq ? GFP_KERNEL : GFP_ATOMIC;
+				io_req_cqe_overflow(req, gfp);
 			}
 		}
 	}
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] io_uring: split alloc and add of overflow
  2025-05-16 16:08 ` [PATCH 1/2] io_uring: split alloc and add of overflow Jens Axboe
@ 2025-05-16 16:31   ` Caleb Sander Mateos
  2025-05-16 16:33     ` Jens Axboe
  2025-05-16 16:43   ` Pavel Begunkov
  1 sibling, 1 reply; 10+ messages in thread
From: Caleb Sander Mateos @ 2025-05-16 16:31 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, asml.silence

On Fri, May 16, 2025 at 9:15 AM Jens Axboe <axboe@kernel.dk> wrote:
>
> Add a new helper, io_alloc_ocqe(), that simply allocates and fills an
> overflow entry. Then it can get done outside of the locking section,
> and hence use more appropriate gfp_t allocation flags rather than always
> default to GFP_ATOMIC.
>
> Suggested-by: Pavel Begunkov <asml.silence@gmail.com>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  io_uring/io_uring.c | 75 +++++++++++++++++++++++++++++----------------
>  1 file changed, 48 insertions(+), 27 deletions(-)
>
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 9a9b8d35349b..2519fab303c4 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -718,20 +718,11 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
>         }
>  }
>
> -static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
> -                                    s32 res, u32 cflags, u64 extra1, u64 extra2)
> +static bool io_cqring_add_overflow(struct io_ring_ctx *ctx,
> +                                  struct io_overflow_cqe *ocqe)
>  {
> -       struct io_overflow_cqe *ocqe;
> -       size_t ocq_size = sizeof(struct io_overflow_cqe);
> -       bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
> -
>         lockdep_assert_held(&ctx->completion_lock);
>
> -       if (is_cqe32)
> -               ocq_size += sizeof(struct io_uring_cqe);
> -
> -       ocqe = kmalloc(ocq_size, GFP_ATOMIC | __GFP_ACCOUNT);
> -       trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
>         if (!ocqe) {
>                 struct io_rings *r = ctx->rings;
>
> @@ -749,22 +740,44 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
>                 atomic_or(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags);
>
>         }
> -       ocqe->cqe.user_data = user_data;
> -       ocqe->cqe.res = res;
> -       ocqe->cqe.flags = cflags;
> -       if (is_cqe32) {
> -               ocqe->cqe.big_cqe[0] = extra1;
> -               ocqe->cqe.big_cqe[1] = extra2;
> -       }
>         list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
>         return true;
>  }
>
> -static void io_req_cqe_overflow(struct io_kiocb *req)
> +static struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx,
> +                                            u64 user_data, s32 res, u32 cflags,
> +                                            u64 extra1, u64 extra2, gfp_t gfp)
> +{
> +       struct io_overflow_cqe *ocqe;
> +       size_t ocq_size = sizeof(struct io_overflow_cqe);
> +       bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
> +
> +       if (is_cqe32)
> +               ocq_size += sizeof(struct io_uring_cqe);
> +
> +       ocqe = kmalloc(ocq_size, gfp | __GFP_ACCOUNT);
> +       trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
> +       if (ocqe) {
> +               ocqe->cqe.user_data = user_data;
> +               ocqe->cqe.res = res;
> +               ocqe->cqe.flags = cflags;
> +               if (is_cqe32) {
> +                       ocqe->cqe.big_cqe[0] = extra1;
> +                       ocqe->cqe.big_cqe[1] = extra2;
> +               }
> +       }
> +       return ocqe;
> +}
> +
> +static void io_req_cqe_overflow(struct io_kiocb *req, gfp_t gfp)
>  {
> -       io_cqring_event_overflow(req->ctx, req->cqe.user_data,
> -                               req->cqe.res, req->cqe.flags,
> -                               req->big_cqe.extra1, req->big_cqe.extra2);
> +       struct io_ring_ctx *ctx = req->ctx;
> +       struct io_overflow_cqe *ocqe;
> +
> +       ocqe = io_alloc_ocqe(ctx, req->cqe.user_data, req->cqe.res,
> +                            req->cqe.flags, req->big_cqe.extra1,
> +                            req->big_cqe.extra2, gfp);
> +       io_cqring_add_overflow(ctx, ocqe);
>         memset(&req->big_cqe, 0, sizeof(req->big_cqe));
>  }
>
> @@ -832,8 +845,12 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
>
>         io_cq_lock(ctx);
>         filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
> -       if (!filled)
> -               filled = io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
> +       if (unlikely(!filled)) {
> +               struct io_overflow_cqe *ocqe;
> +
> +               ocqe = io_alloc_ocqe(ctx, user_data, res, cflags, 0, 0, GFP_ATOMIC);
> +               filled = io_cqring_add_overflow(ctx, ocqe);
> +       }
>         io_cq_unlock_post(ctx);
>         return filled;
>  }
> @@ -848,8 +865,11 @@ void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
>         lockdep_assert(ctx->lockless_cq);
>
>         if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
> +               struct io_overflow_cqe *ocqe;
> +
> +               ocqe = io_alloc_ocqe(ctx, user_data, res, cflags, 0, 0, GFP_KERNEL);
>                 spin_lock(&ctx->completion_lock);
> -               io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
> +               io_cqring_add_overflow(ctx, ocqe);
>                 spin_unlock(&ctx->completion_lock);
>         }
>         ctx->submit_state.cq_flush = true;
> @@ -1442,10 +1462,11 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
>                     unlikely(!io_fill_cqe_req(ctx, req))) {
>                         if (ctx->lockless_cq) {
>                                 spin_lock(&ctx->completion_lock);
> -                               io_req_cqe_overflow(req);
> +                               io_req_cqe_overflow(req, GFP_ATOMIC);
>                                 spin_unlock(&ctx->completion_lock);
>                         } else {
> -                               io_req_cqe_overflow(req);
> +                               gfp_t gfp = ctx->lockless_cq ? GFP_KERNEL : GFP_ATOMIC;

This is in the else case of an if (ctx->lockless_cq). Isn't
ctx->lockless_cq known to be false?

Best,
Caleb

> +                               io_req_cqe_overflow(req, gfp);
>                         }
>                 }
>         }
> --
> 2.49.0
>
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] io_uring: split alloc and add of overflow
  2025-05-16 16:31   ` Caleb Sander Mateos
@ 2025-05-16 16:33     ` Jens Axboe
  0 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2025-05-16 16:33 UTC (permalink / raw)
  To: Caleb Sander Mateos; +Cc: io-uring, asml.silence

On 5/16/25 10:31 AM, Caleb Sander Mateos wrote:
>> @@ -1442,10 +1462,11 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
>>                     unlikely(!io_fill_cqe_req(ctx, req))) {
>>                         if (ctx->lockless_cq) {
>>                                 spin_lock(&ctx->completion_lock);
>> -                               io_req_cqe_overflow(req);
>> +                               io_req_cqe_overflow(req, GFP_ATOMIC);
>>                                 spin_unlock(&ctx->completion_lock);
>>                         } else {
>> -                               io_req_cqe_overflow(req);
>> +                               gfp_t gfp = ctx->lockless_cq ? GFP_KERNEL : GFP_ATOMIC;
> 
> This is in the else case of an if (ctx->lockless_cq). Isn't
> ctx->lockless_cq known to be false?

Indeed! Actually this part needs to be split too. I'll redo this one.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] io_uring: split alloc and add of overflow
  2025-05-16 16:08 ` [PATCH 1/2] io_uring: split alloc and add of overflow Jens Axboe
  2025-05-16 16:31   ` Caleb Sander Mateos
@ 2025-05-16 16:43   ` Pavel Begunkov
  2025-05-16 16:44     ` Jens Axboe
  1 sibling, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2025-05-16 16:43 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 5/16/25 17:08, Jens Axboe wrote:
> Add a new helper, io_alloc_ocqe(), that simply allocates and fills an
> overflow entry. Then it can get done outside of the locking section,
> and hence use more appropriate gfp_t allocation flags rather than always
> default to GFP_ATOMIC.
> 
> Suggested-by: Pavel Begunkov <asml.silence@gmail.com>

I didn't suggest that. If anything, it complicates CQE posting
helpers when we should be moving in the opposite direction.

> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>   io_uring/io_uring.c | 75 +++++++++++++++++++++++++++++----------------
>   1 file changed, 48 insertions(+), 27 deletions(-)
> 
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 9a9b8d35349b..2519fab303c4 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -718,20 +718,11 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task)

...
>   	ctx->submit_state.cq_flush = true;
> @@ -1442,10 +1462,11 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
>   		    unlikely(!io_fill_cqe_req(ctx, req))) {
>   			if (ctx->lockless_cq) {
>   				spin_lock(&ctx->completion_lock);
> -				io_req_cqe_overflow(req);
> +				io_req_cqe_overflow(req, GFP_ATOMIC);
>   				spin_unlock(&ctx->completion_lock);
>   			} else {
> -				io_req_cqe_overflow(req);
> +				gfp_t gfp = ctx->lockless_cq ? GFP_KERNEL : GFP_ATOMIC;

if (!ctx->lockless_cq)
	gfp_t gfp = ctx->lockless_cq ? GFP_KERNEL : GFP_ATOMIC;

-- 
Pavel Begunkov


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] io_uring: split alloc and add of overflow
  2025-05-16 16:43   ` Pavel Begunkov
@ 2025-05-16 16:44     ` Jens Axboe
  2025-05-16 16:58       ` Pavel Begunkov
  0 siblings, 1 reply; 10+ messages in thread
From: Jens Axboe @ 2025-05-16 16:44 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 5/16/25 10:43 AM, Pavel Begunkov wrote:
> On 5/16/25 17:08, Jens Axboe wrote:
>> Add a new helper, io_alloc_ocqe(), that simply allocates and fills an
>> overflow entry. Then it can get done outside of the locking section,
>> and hence use more appropriate gfp_t allocation flags rather than always
>> default to GFP_ATOMIC.
>>
>> Suggested-by: Pavel Begunkov <asml.silence@gmail.com>
> 
> I didn't suggest that. If anything, it complicates CQE posting
> helpers when we should be moving in the opposite direction.

I'll kill the attribution then - it's not meant to mean the
approach, but the concept of being able to use GFP_KERNEL
when we can.

>> @@ -1442,10 +1462,11 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
>>               unlikely(!io_fill_cqe_req(ctx, req))) {
>>               if (ctx->lockless_cq) {
>>                   spin_lock(&ctx->completion_lock);
>> -                io_req_cqe_overflow(req);
>> +                io_req_cqe_overflow(req, GFP_ATOMIC);
>>                   spin_unlock(&ctx->completion_lock);
>>               } else {
>> -                io_req_cqe_overflow(req);
>> +                gfp_t gfp = ctx->lockless_cq ? GFP_KERNEL : GFP_ATOMIC;
> 
> if (!ctx->lockless_cq)
>     gfp_t gfp = ctx->lockless_cq ? GFP_KERNEL : GFP_ATOMIC;
> 

Yeah see other reply to Caleb. I'll just slurp in your patch 1/4 as this
makes it simpler.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] io_uring: split alloc and add of overflow
  2025-05-16 16:44     ` Jens Axboe
@ 2025-05-16 16:58       ` Pavel Begunkov
  2025-05-16 16:57         ` Jens Axboe
  0 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2025-05-16 16:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 5/16/25 17:44, Jens Axboe wrote:
> On 5/16/25 10:43 AM, Pavel Begunkov wrote:
>> On 5/16/25 17:08, Jens Axboe wrote:
>>> Add a new helper, io_alloc_ocqe(), that simply allocates and fills an
>>> overflow entry. Then it can get done outside of the locking section,
>>> and hence use more appropriate gfp_t allocation flags rather than always
>>> default to GFP_ATOMIC.
>>>
>>> Suggested-by: Pavel Begunkov <asml.silence@gmail.com>
>>
>> I didn't suggest that. If anything, it complicates CQE posting
>> helpers when we should be moving in the opposite direction.
> 
> I'll kill the attribution then - it's not meant to mean the
> approach, but the concept of being able to use GFP_KERNEL
> when we can.

Sure, but that will be blurred by time, while the patch IMHO is
making it worse and should never see the light.

-- 
Pavel Begunkov


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] io_uring: split alloc and add of overflow
  2025-05-16 16:58       ` Pavel Begunkov
@ 2025-05-16 16:57         ` Jens Axboe
  2025-05-16 18:27           ` Pavel Begunkov
  0 siblings, 1 reply; 10+ messages in thread
From: Jens Axboe @ 2025-05-16 16:57 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 5/16/25 10:58 AM, Pavel Begunkov wrote:
> On 5/16/25 17:44, Jens Axboe wrote:
>> On 5/16/25 10:43 AM, Pavel Begunkov wrote:
>>> On 5/16/25 17:08, Jens Axboe wrote:
>>>> Add a new helper, io_alloc_ocqe(), that simply allocates and fills an
>>>> overflow entry. Then it can get done outside of the locking section,
>>>> and hence use more appropriate gfp_t allocation flags rather than always
>>>> default to GFP_ATOMIC.
>>>>
>>>> Suggested-by: Pavel Begunkov <asml.silence@gmail.com>
>>>
>>> I didn't suggest that. If anything, it complicates CQE posting
>>> helpers when we should be moving in the opposite direction.
>>
>> I'll kill the attribution then - it's not meant to mean the
>> approach, but the concept of being able to use GFP_KERNEL
>> when we can.
> 
> Sure, but that will be blurred by time, while the patch IMHO is
> making it worse and should never see the light.

Well, you're certainly cheerful today.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] io_uring: split alloc and add of overflow
  2025-05-16 16:57         ` Jens Axboe
@ 2025-05-16 18:27           ` Pavel Begunkov
  0 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-05-16 18:27 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 5/16/25 17:57, Jens Axboe wrote:
> On 5/16/25 10:58 AM, Pavel Begunkov wrote:
>> On 5/16/25 17:44, Jens Axboe wrote:
>>> On 5/16/25 10:43 AM, Pavel Begunkov wrote:
>>>> On 5/16/25 17:08, Jens Axboe wrote:
>>>>> Add a new helper, io_alloc_ocqe(), that simply allocates and fills an
>>>>> overflow entry. Then it can get done outside of the locking section,
>>>>> and hence use more appropriate gfp_t allocation flags rather than always
>>>>> default to GFP_ATOMIC.
>>>>>
>>>>> Suggested-by: Pavel Begunkov <asml.silence@gmail.com>
>>>>
>>>> I didn't suggest that. If anything, it complicates CQE posting
>>>> helpers when we should be moving in the opposite direction.
>>>
>>> I'll kill the attribution then - it's not meant to mean the
>>> approach, but the concept of being able to use GFP_KERNEL
>>> when we can.
>>
>> Sure, but that will be blurred by time, while the patch IMHO is
>> making it worse and should never see the light.
> 
> Well, you're certainly cheerful today.

Only extending an opinion, which, it seems, you're going to
shrug off. That's fine, I just need to learn my lessons and
stop caring about improving the code base.

-- 
Pavel Begunkov


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 2/2] io_uring: make io_alloc_ocqe() take a struct io_cqe pointer
  2025-05-16 16:08 [PATCHSET 0/2] Allow non-atomic allocs for overflows Jens Axboe
  2025-05-16 16:08 ` [PATCH 1/2] io_uring: split alloc and add of overflow Jens Axboe
@ 2025-05-16 16:08 ` Jens Axboe
  1 sibling, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2025-05-16 16:08 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Jens Axboe

The number of arguments to io_alloc_ocqe() is a bit unwieldy. Make it
take a struct io_cqe pointer rather than three sepearate CQE args. One
path already has that readily available, add an io_init_cqe() helper for
the remainding two.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/io_uring.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 2519fab303c4..e3c8c19902e8 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -745,8 +745,8 @@ static bool io_cqring_add_overflow(struct io_ring_ctx *ctx,
 }
 
 static struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx,
-					     u64 user_data, s32 res, u32 cflags,
-					     u64 extra1, u64 extra2, gfp_t gfp)
+					     struct io_cqe *cqe, u64 extra1,
+					     u64 extra2, gfp_t gfp)
 {
 	struct io_overflow_cqe *ocqe;
 	size_t ocq_size = sizeof(struct io_overflow_cqe);
@@ -756,11 +756,11 @@ static struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx,
 		ocq_size += sizeof(struct io_uring_cqe);
 
 	ocqe = kmalloc(ocq_size, gfp | __GFP_ACCOUNT);
-	trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
+	trace_io_uring_cqe_overflow(ctx, cqe->user_data, cqe->res, cqe->flags, ocqe);
 	if (ocqe) {
-		ocqe->cqe.user_data = user_data;
-		ocqe->cqe.res = res;
-		ocqe->cqe.flags = cflags;
+		ocqe->cqe.user_data = cqe->user_data;
+		ocqe->cqe.res = cqe->res;
+		ocqe->cqe.flags = cqe->flags;
 		if (is_cqe32) {
 			ocqe->cqe.big_cqe[0] = extra1;
 			ocqe->cqe.big_cqe[1] = extra2;
@@ -774,8 +774,7 @@ static void io_req_cqe_overflow(struct io_kiocb *req, gfp_t gfp)
 	struct io_ring_ctx *ctx = req->ctx;
 	struct io_overflow_cqe *ocqe;
 
-	ocqe = io_alloc_ocqe(ctx, req->cqe.user_data, req->cqe.res,
-			     req->cqe.flags, req->big_cqe.extra1,
+	ocqe = io_alloc_ocqe(ctx, &req->cqe, req->big_cqe.extra1,
 			     req->big_cqe.extra2, gfp);
 	io_cqring_add_overflow(ctx, ocqe);
 	memset(&req->big_cqe, 0, sizeof(req->big_cqe));
@@ -839,6 +838,9 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
 	return false;
 }
 
+#define io_init_cqe(user_data, res, cflags)	\
+	(struct io_cqe) { .user_data = user_data, .res = res, .flags = cflags }
+
 bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
 {
 	bool filled;
@@ -847,8 +849,9 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
 	filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
 	if (unlikely(!filled)) {
 		struct io_overflow_cqe *ocqe;
+		struct io_cqe cqe = io_init_cqe(user_data, res, cflags);
 
-		ocqe = io_alloc_ocqe(ctx, user_data, res, cflags, 0, 0, GFP_ATOMIC);
+		ocqe = io_alloc_ocqe(ctx, &cqe, 0, 0, GFP_ATOMIC);
 		filled = io_cqring_add_overflow(ctx, ocqe);
 	}
 	io_cq_unlock_post(ctx);
@@ -866,8 +869,9 @@ void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
 
 	if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
 		struct io_overflow_cqe *ocqe;
+		struct io_cqe cqe = io_init_cqe(user_data, res, cflags);
 
-		ocqe = io_alloc_ocqe(ctx, user_data, res, cflags, 0, 0, GFP_KERNEL);
+		ocqe = io_alloc_ocqe(ctx, &cqe, 0, 0, GFP_KERNEL);
 		spin_lock(&ctx->completion_lock);
 		io_cqring_add_overflow(ctx, ocqe);
 		spin_unlock(&ctx->completion_lock);
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2025-05-16 18:25 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-16 16:08 [PATCHSET 0/2] Allow non-atomic allocs for overflows Jens Axboe
2025-05-16 16:08 ` [PATCH 1/2] io_uring: split alloc and add of overflow Jens Axboe
2025-05-16 16:31   ` Caleb Sander Mateos
2025-05-16 16:33     ` Jens Axboe
2025-05-16 16:43   ` Pavel Begunkov
2025-05-16 16:44     ` Jens Axboe
2025-05-16 16:58       ` Pavel Begunkov
2025-05-16 16:57         ` Jens Axboe
2025-05-16 18:27           ` Pavel Begunkov
2025-05-16 16:08 ` [PATCH 2/2] io_uring: make io_alloc_ocqe() take a struct io_cqe pointer Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox