From: Caleb Sander Mateos <[email protected]>
To: Keith Busch <[email protected]>
Cc: [email protected], [email protected], [email protected],
[email protected], [email protected],
[email protected], Keith Busch <[email protected]>
Subject: Re: [PATCHv4 1/5] io_uring: move fixed buffer import to issue path
Date: Tue, 18 Feb 2025 17:27:59 -0800 [thread overview]
Message-ID: <CADUfDZrN5LqmoSb1+e2DHejQM_xewOaVxXmU99LsXxc=erCy3A@mail.gmail.com> (raw)
In-Reply-To: <[email protected]>
On Tue, Feb 18, 2025 at 2:43 PM Keith Busch <[email protected]> wrote:
>
> From: Keith Busch <[email protected]>
>
> Similar to the fixed file path, requests may depend on a previous one
> to set up an index, so we need to allow linking them. The prep callback
> happens too soon for linked commands, so the lookup needs to be deferred
> to the issue path. Change the prep callbacks to just set the buf_index
> and let generic io_uring code handle the fixed buffer node setup, just
> like it already does for fixed files.
>
> Signed-off-by: Keith Busch <[email protected]>
> ---
> include/linux/io_uring_types.h | 3 +++
> io_uring/io_uring.c | 19 ++++++++++++++
> io_uring/net.c | 25 ++++++-------------
> io_uring/nop.c | 22 +++--------------
> io_uring/rw.c | 45 ++++++++++++++++++++++++----------
> io_uring/uring_cmd.c | 16 ++----------
> 6 files changed, 67 insertions(+), 63 deletions(-)
>
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index c0fe8a00fe53a..0bcaefc4ffe02 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -482,6 +482,7 @@ enum {
> REQ_F_DOUBLE_POLL_BIT,
> REQ_F_APOLL_MULTISHOT_BIT,
> REQ_F_CLEAR_POLLIN_BIT,
> + REQ_F_FIXED_BUFFER_BIT,
Move this to the end of the REQ_F_*_BIT definitions (before __REQ_F_LAST_BIT)?
> /* keep async read/write and isreg together and in order */
> REQ_F_SUPPORT_NOWAIT_BIT,
> REQ_F_ISREG_BIT,
> @@ -574,6 +575,8 @@ enum {
> REQ_F_BUF_NODE = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT),
> /* request has read/write metadata assigned */
> REQ_F_HAS_METADATA = IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT),
> + /* request has a fixed buffer at buf_index */
> + REQ_F_FIXED_BUFFER = IO_REQ_FLAG(REQ_F_FIXED_BUFFER_BIT),
> };
>
> typedef void (*io_req_tw_func_t)(struct io_kiocb *req, io_tw_token_t tw);
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 58528bf61638e..7800edbc57279 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -1721,6 +1721,23 @@ static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
> return !!req->file;
> }
>
> +static bool io_assign_buffer(struct io_kiocb *req, unsigned int issue_flags)
> +{
> + struct io_ring_ctx *ctx = req->ctx;
> + struct io_rsrc_node *node;
> +
> + if (req->buf_node || !(req->flags & REQ_F_FIXED_BUFFER))
> + return true;
> +
> + io_ring_submit_lock(ctx, issue_flags);
> + node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index);
> + if (node)
> + io_req_assign_buf_node(req, node);
> + io_ring_submit_unlock(ctx, issue_flags);
> +
> + return !!node;
> +}
> +
> static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
> {
> const struct io_issue_def *def = &io_issue_defs[req->opcode];
> @@ -1729,6 +1746,8 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
>
> if (unlikely(!io_assign_file(req, def, issue_flags)))
> return -EBADF;
> + if (unlikely(!io_assign_buffer(req, issue_flags)))
> + return -EFAULT;
>
> if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred()))
> creds = override_creds(req->creds);
> diff --git a/io_uring/net.c b/io_uring/net.c
> index 000dc70d08d0d..39838e8575b53 100644
> --- a/io_uring/net.c
> +++ b/io_uring/net.c
> @@ -1373,6 +1373,10 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> #endif
> if (unlikely(!io_msg_alloc_async(req)))
> return -ENOMEM;
> + if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
> + req->buf_index = zc->buf_index;
Can the buf_index field of io_sr_msg be removed now that it's only
used within io_send_zc_prep()?
> + req->flags |= REQ_F_FIXED_BUFFER;
> + }
> if (req->opcode != IORING_OP_SENDMSG_ZC)
> return io_send_setup(req, sqe);
> return io_sendmsg_setup(req, sqe);
> @@ -1434,25 +1438,10 @@ static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
> struct io_async_msghdr *kmsg = req->async_data;
> int ret;
>
> - if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
> - struct io_ring_ctx *ctx = req->ctx;
> - struct io_rsrc_node *node;
> -
> - ret = -EFAULT;
> - io_ring_submit_lock(ctx, issue_flags);
> - node = io_rsrc_node_lookup(&ctx->buf_table, sr->buf_index);
> - if (node) {
> - io_req_assign_buf_node(sr->notif, node);
> - ret = 0;
> - }
> - io_ring_submit_unlock(ctx, issue_flags);
> -
> - if (unlikely(ret))
> - return ret;
> -
> + if (req->flags & REQ_F_FIXED_BUFFER) {
> ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter,
> - node->buf, (u64)(uintptr_t)sr->buf,
> - sr->len);
> + req->buf_node->buf,
> + (u64)(uintptr_t)sr->buf, sr->len);
> if (unlikely(ret))
> return ret;
> kmsg->msg.sg_from_iter = io_sg_from_iter;
> diff --git a/io_uring/nop.c b/io_uring/nop.c
> index 5e5196df650a1..989908603112f 100644
> --- a/io_uring/nop.c
> +++ b/io_uring/nop.c
> @@ -16,7 +16,6 @@ struct io_nop {
> struct file *file;
> int result;
> int fd;
> - int buffer;
> unsigned int flags;
> };
>
> @@ -39,10 +38,10 @@ int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> nop->fd = READ_ONCE(sqe->fd);
> else
> nop->fd = -1;
> - if (nop->flags & IORING_NOP_FIXED_BUFFER)
> - nop->buffer = READ_ONCE(sqe->buf_index);
> - else
> - nop->buffer = -1;
> + if (nop->flags & IORING_NOP_FIXED_BUFFER) {
> + req->buf_index = READ_ONCE(sqe->buf_index);
> + req->flags |= REQ_F_FIXED_BUFFER;
> + }
> return 0;
> }
>
> @@ -63,19 +62,6 @@ int io_nop(struct io_kiocb *req, unsigned int issue_flags)
> goto done;
> }
> }
> - if (nop->flags & IORING_NOP_FIXED_BUFFER) {
> - struct io_ring_ctx *ctx = req->ctx;
> - struct io_rsrc_node *node;
> -
> - ret = -EFAULT;
> - io_ring_submit_lock(ctx, issue_flags);
> - node = io_rsrc_node_lookup(&ctx->buf_table, nop->buffer);
> - if (node) {
> - io_req_assign_buf_node(req, node);
> - ret = 0;
> - }
> - io_ring_submit_unlock(ctx, issue_flags);
> - }
> done:
> if (ret < 0)
> req_set_fail(req);
> diff --git a/io_uring/rw.c b/io_uring/rw.c
> index 16f12f94943f7..2d8910d9197a0 100644
> --- a/io_uring/rw.c
> +++ b/io_uring/rw.c
> @@ -353,25 +353,14 @@ int io_prep_writev(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> static int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe,
> int ddir)
> {
> - struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
> - struct io_ring_ctx *ctx = req->ctx;
> - struct io_rsrc_node *node;
> - struct io_async_rw *io;
> int ret;
>
> ret = io_prep_rw(req, sqe, ddir, false);
> if (unlikely(ret))
> return ret;
>
> - node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index);
> - if (!node)
> - return -EFAULT;
> - io_req_assign_buf_node(req, node);
> -
> - io = req->async_data;
> - ret = io_import_fixed(ddir, &io->iter, node->buf, rw->addr, rw->len);
> - iov_iter_save_state(&io->iter, &io->iter_state);
> - return ret;
> + req->flags |= REQ_F_FIXED_BUFFER;
> + return 0;
> }
>
> int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> @@ -954,10 +943,36 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
> return ret;
> }
>
> +static int io_import_fixed_buffer(struct io_kiocb *req, int ddir)
> +{
> + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
> + struct io_async_rw *io;
> + int ret;
> +
> + if (!(req->flags & REQ_F_FIXED_BUFFER))
> + return 0;
> +
> + io = req->async_data;
> + if (io->bytes_done)
> + return 0;
> +
> + ret = io_import_fixed(ddir, &io->iter, req->buf_node->buf, rw->addr,
> + rw->len);
> + if (ret)
> + return ret;
> +
> + iov_iter_save_state(&io->iter, &io->iter_state);
> + return 0;
> +}
> +
> int io_read(struct io_kiocb *req, unsigned int issue_flags)
> {
> int ret;
>
> + ret = io_import_fixed_buffer(req, READ);
> + if (unlikely(ret))
> + return ret;
> +
> ret = __io_read(req, issue_flags);
> if (ret >= 0)
> return kiocb_done(req, ret, issue_flags);
> @@ -1062,6 +1077,10 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
> ssize_t ret, ret2;
> loff_t *ppos;
>
> + ret = io_import_fixed_buffer(req, WRITE);
> + if (unlikely(ret))
> + return ret;
> +
> ret = io_rw_init_file(req, FMODE_WRITE, WRITE);
> if (unlikely(ret))
> return ret;
> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
> index 8bdf2c9b3fef9..112b49fde23e5 100644
> --- a/io_uring/uring_cmd.c
> +++ b/io_uring/uring_cmd.c
> @@ -200,19 +200,8 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> return -EINVAL;
>
> if (ioucmd->flags & IORING_URING_CMD_FIXED) {
> - struct io_ring_ctx *ctx = req->ctx;
> - struct io_rsrc_node *node;
> - u16 index = READ_ONCE(sqe->buf_index);
> -
> - node = io_rsrc_node_lookup(&ctx->buf_table, index);
> - if (unlikely(!node))
> - return -EFAULT;
> - /*
> - * Pi node upfront, prior to io_uring_cmd_import_fixed()
> - * being called. This prevents destruction of the mapped buffer
> - * we'll need at actual import time.
> - */
> - io_req_assign_buf_node(req, node);
> + req->buf_index = READ_ONCE(sqe->buf_index);
> + req->flags |= REQ_F_FIXED_BUFFER;
> }
> ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
>
> @@ -262,7 +251,6 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
> struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
> struct io_rsrc_node *node = req->buf_node;
>
> - /* Must have had rsrc_node assigned at prep time */
> if (node)
> return io_import_fixed(rw, iter, node->buf, ubuf, len);
Is it possible for node to be NULL? If the buffer lookup failed,
io_issue_sqe() would have returned early and not called ->issue(), so
this function wouldn't have been called.
Best,
Caleb
>
> --
> 2.43.5
>
next prev parent reply other threads:[~2025-02-19 1:28 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-18 22:42 [PATCHv4 0/5] ublk zero-copy support Keith Busch
2025-02-18 22:42 ` [PATCHv4 1/5] io_uring: move fixed buffer import to issue path Keith Busch
2025-02-19 1:27 ` Caleb Sander Mateos [this message]
2025-02-19 4:23 ` Ming Lei
2025-02-19 16:48 ` Pavel Begunkov
2025-02-19 17:15 ` Pavel Begunkov
2025-02-20 1:25 ` Keith Busch
2025-02-20 10:12 ` Pavel Begunkov
2025-02-18 22:42 ` [PATCHv4 2/5] io_uring: add support for kernel registered bvecs Keith Busch
2025-02-19 1:54 ` Caleb Sander Mateos
2025-02-19 17:23 ` Pavel Begunkov
2025-02-20 10:31 ` Pavel Begunkov
2025-02-20 10:38 ` Pavel Begunkov
2025-02-18 22:42 ` [PATCHv4 3/5] ublk: zc register/unregister bvec Keith Busch
2025-02-19 2:36 ` Caleb Sander Mateos
2025-02-20 11:11 ` Pavel Begunkov
2025-02-24 21:02 ` Keith Busch
2025-02-18 22:42 ` [PATCHv4 4/5] io_uring: add abstraction for buf_table rsrc data Keith Busch
2025-02-19 3:04 ` Caleb Sander Mateos
2025-02-18 22:42 ` [PATCHv4 5/5] io_uring: cache nodes and mapped buffers Keith Busch
2025-02-19 4:22 ` Caleb Sander Mateos
2025-02-24 21:01 ` Keith Busch
2025-02-24 21:39 ` Caleb Sander Mateos
2025-02-20 11:08 ` Pavel Begunkov
2025-02-20 15:24 ` Keith Busch
2025-02-20 16:06 ` Pavel Begunkov
2025-02-24 21:04 ` Keith Busch
2025-02-25 13:06 ` Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CADUfDZrN5LqmoSb1+e2DHejQM_xewOaVxXmU99LsXxc=erCy3A@mail.gmail.com' \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox