From: Caleb Sander Mateos <csander@purestorage.com>
To: Jens Axboe <axboe@kernel.dk>
Cc: io-uring@vger.kernel.org, asml.silence@gmail.com
Subject: Re: [PATCH 1/2] io_uring: split alloc and add of overflow
Date: Fri, 16 May 2025 09:31:51 -0700 [thread overview]
Message-ID: <CADUfDZrp-Qq93g5uZn4_=amFhzF=j2Yk0MqJ5zqi_qYC4ZdhUQ@mail.gmail.com> (raw)
In-Reply-To: <20250516161452.395927-2-axboe@kernel.dk>
On Fri, May 16, 2025 at 9:15 AM Jens Axboe <axboe@kernel.dk> wrote:
>
> Add a new helper, io_alloc_ocqe(), that simply allocates and fills an
> overflow entry. Then it can get done outside of the locking section,
> and hence use more appropriate gfp_t allocation flags rather than always
> default to GFP_ATOMIC.
>
> Suggested-by: Pavel Begunkov <asml.silence@gmail.com>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
> io_uring/io_uring.c | 75 +++++++++++++++++++++++++++++----------------
> 1 file changed, 48 insertions(+), 27 deletions(-)
>
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 9a9b8d35349b..2519fab303c4 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -718,20 +718,11 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
> }
> }
>
> -static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
> - s32 res, u32 cflags, u64 extra1, u64 extra2)
> +static bool io_cqring_add_overflow(struct io_ring_ctx *ctx,
> + struct io_overflow_cqe *ocqe)
> {
> - struct io_overflow_cqe *ocqe;
> - size_t ocq_size = sizeof(struct io_overflow_cqe);
> - bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
> -
> lockdep_assert_held(&ctx->completion_lock);
>
> - if (is_cqe32)
> - ocq_size += sizeof(struct io_uring_cqe);
> -
> - ocqe = kmalloc(ocq_size, GFP_ATOMIC | __GFP_ACCOUNT);
> - trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
> if (!ocqe) {
> struct io_rings *r = ctx->rings;
>
> @@ -749,22 +740,44 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
> atomic_or(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags);
>
> }
> - ocqe->cqe.user_data = user_data;
> - ocqe->cqe.res = res;
> - ocqe->cqe.flags = cflags;
> - if (is_cqe32) {
> - ocqe->cqe.big_cqe[0] = extra1;
> - ocqe->cqe.big_cqe[1] = extra2;
> - }
> list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
> return true;
> }
>
> -static void io_req_cqe_overflow(struct io_kiocb *req)
> +static struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx,
> + u64 user_data, s32 res, u32 cflags,
> + u64 extra1, u64 extra2, gfp_t gfp)
> +{
> + struct io_overflow_cqe *ocqe;
> + size_t ocq_size = sizeof(struct io_overflow_cqe);
> + bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
> +
> + if (is_cqe32)
> + ocq_size += sizeof(struct io_uring_cqe);
> +
> + ocqe = kmalloc(ocq_size, gfp | __GFP_ACCOUNT);
> + trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
> + if (ocqe) {
> + ocqe->cqe.user_data = user_data;
> + ocqe->cqe.res = res;
> + ocqe->cqe.flags = cflags;
> + if (is_cqe32) {
> + ocqe->cqe.big_cqe[0] = extra1;
> + ocqe->cqe.big_cqe[1] = extra2;
> + }
> + }
> + return ocqe;
> +}
> +
> +static void io_req_cqe_overflow(struct io_kiocb *req, gfp_t gfp)
> {
> - io_cqring_event_overflow(req->ctx, req->cqe.user_data,
> - req->cqe.res, req->cqe.flags,
> - req->big_cqe.extra1, req->big_cqe.extra2);
> + struct io_ring_ctx *ctx = req->ctx;
> + struct io_overflow_cqe *ocqe;
> +
> + ocqe = io_alloc_ocqe(ctx, req->cqe.user_data, req->cqe.res,
> + req->cqe.flags, req->big_cqe.extra1,
> + req->big_cqe.extra2, gfp);
> + io_cqring_add_overflow(ctx, ocqe);
> memset(&req->big_cqe, 0, sizeof(req->big_cqe));
> }
>
> @@ -832,8 +845,12 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
>
> io_cq_lock(ctx);
> filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
> - if (!filled)
> - filled = io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
> + if (unlikely(!filled)) {
> + struct io_overflow_cqe *ocqe;
> +
> + ocqe = io_alloc_ocqe(ctx, user_data, res, cflags, 0, 0, GFP_ATOMIC);
> + filled = io_cqring_add_overflow(ctx, ocqe);
> + }
> io_cq_unlock_post(ctx);
> return filled;
> }
> @@ -848,8 +865,11 @@ void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
> lockdep_assert(ctx->lockless_cq);
>
> if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
> + struct io_overflow_cqe *ocqe;
> +
> + ocqe = io_alloc_ocqe(ctx, user_data, res, cflags, 0, 0, GFP_KERNEL);
> spin_lock(&ctx->completion_lock);
> - io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
> + io_cqring_add_overflow(ctx, ocqe);
> spin_unlock(&ctx->completion_lock);
> }
> ctx->submit_state.cq_flush = true;
> @@ -1442,10 +1462,11 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
> unlikely(!io_fill_cqe_req(ctx, req))) {
> if (ctx->lockless_cq) {
> spin_lock(&ctx->completion_lock);
> - io_req_cqe_overflow(req);
> + io_req_cqe_overflow(req, GFP_ATOMIC);
> spin_unlock(&ctx->completion_lock);
> } else {
> - io_req_cqe_overflow(req);
> + gfp_t gfp = ctx->lockless_cq ? GFP_KERNEL : GFP_ATOMIC;
This is in the else case of an if (ctx->lockless_cq). Isn't
ctx->lockless_cq known to be false?
Best,
Caleb
> + io_req_cqe_overflow(req, gfp);
> }
> }
> }
> --
> 2.49.0
>
>
next prev parent reply other threads:[~2025-05-16 16:32 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-16 16:08 [PATCHSET 0/2] Allow non-atomic allocs for overflows Jens Axboe
2025-05-16 16:08 ` [PATCH 1/2] io_uring: split alloc and add of overflow Jens Axboe
2025-05-16 16:31 ` Caleb Sander Mateos [this message]
2025-05-16 16:33 ` Jens Axboe
2025-05-16 16:43 ` Pavel Begunkov
2025-05-16 16:44 ` Jens Axboe
2025-05-16 16:58 ` Pavel Begunkov
2025-05-16 16:57 ` Jens Axboe
2025-05-16 18:27 ` Pavel Begunkov
2025-05-16 16:08 ` [PATCH 2/2] io_uring: make io_alloc_ocqe() take a struct io_cqe pointer Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CADUfDZrp-Qq93g5uZn4_=amFhzF=j2Yk0MqJ5zqi_qYC4ZdhUQ@mail.gmail.com' \
--to=csander@purestorage.com \
--cc=asml.silence@gmail.com \
--cc=axboe@kernel.dk \
--cc=io-uring@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox