From: Caleb Sander Mateos <[email protected]>
To: Keith Busch <[email protected]>
Cc: [email protected], [email protected], [email protected],
[email protected], [email protected],
[email protected], Keith Busch <[email protected]>
Subject: Re: [PATCHv4 3/5] ublk: zc register/unregister bvec
Date: Tue, 18 Feb 2025 18:36:28 -0800 [thread overview]
Message-ID: <CADUfDZq-LnAeP17GAdqGAPzCY77hrj+V+yEVi7G=_Uv4a3txaw@mail.gmail.com> (raw)
In-Reply-To: <[email protected]>
Sorry, I sent these comments on v3 to you directly and forgot to CC
the list. Copying them here.
On Tue, Feb 18, 2025 at 2:43 PM Keith Busch <[email protected]> wrote:
>
> From: Keith Busch <[email protected]>
>
> Provide new operations for the user to request mapping an active request
> to an io uring instance's buf_table. The user has to provide the index
> it wants to install the buffer.
>
> A reference count is taken on the request to ensure it can't be
> completed while it is active in a ring's buf_table.
>
> Signed-off-by: Keith Busch <[email protected]>
> ---
> drivers/block/ublk_drv.c | 137 +++++++++++++++++++++++++---------
> include/uapi/linux/ublk_cmd.h | 4 +
> 2 files changed, 105 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index 529085181f355..0c753176b14e9 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -51,6 +51,9 @@
> /* private ioctl command mirror */
> #define UBLK_CMD_DEL_DEV_ASYNC _IOC_NR(UBLK_U_CMD_DEL_DEV_ASYNC)
>
> +#define UBLK_IO_REGISTER_IO_BUF _IOC_NR(UBLK_U_IO_REGISTER_IO_BUF)
> +#define UBLK_IO_UNREGISTER_IO_BUF _IOC_NR(UBLK_U_IO_UNREGISTER_IO_BUF)
> +
> /* All UBLK_F_* have to be included into UBLK_F_ALL */
> #define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \
> | UBLK_F_URING_CMD_COMP_IN_TASK \
> @@ -201,7 +204,7 @@ static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
> int tag);
> static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub)
> {
> - return ub->dev_info.flags & UBLK_F_USER_COPY;
> + return ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY);
> }
>
> static inline bool ublk_dev_is_zoned(const struct ublk_device *ub)
> @@ -581,7 +584,7 @@ static void ublk_apply_params(struct ublk_device *ub)
>
> static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
> {
> - return ubq->flags & UBLK_F_USER_COPY;
> + return ubq->flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY);
> }
>
> static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
> @@ -1747,6 +1750,96 @@ static inline void ublk_prep_cancel(struct io_uring_cmd *cmd,
> io_uring_cmd_mark_cancelable(cmd, issue_flags);
> }
>
> +static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
> + struct ublk_queue *ubq, int tag, size_t offset)
> +{
> + struct request *req;
> +
> + if (!ublk_need_req_ref(ubq))
> + return NULL;
> +
> + req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
> + if (!req)
> + return NULL;
> +
> + if (!ublk_get_req_ref(ubq, req))
> + return NULL;
> +
> + if (unlikely(!blk_mq_request_started(req) || req->tag != tag))
> + goto fail_put;
> +
> + if (!ublk_rq_has_data(req))
> + goto fail_put;
> +
> + if (offset > blk_rq_bytes(req))
> + goto fail_put;
> +
> + return req;
> +fail_put:
> + ublk_put_req_ref(ubq, req);
> + return NULL;
> +}
> +
> +static void ublk_io_release(void *priv)
> +{
> + struct request *rq = priv;
> + struct ublk_queue *ubq = rq->mq_hctx->driver_data;
> +
> + ublk_put_req_ref(ubq, rq);
> +}
> +
> +static int ublk_register_io_buf(struct io_uring_cmd *cmd,
> + struct ublk_queue *ubq, int tag,
> + const struct ublksrv_io_cmd *ub_cmd,
> + unsigned int issue_flags)
> +{
> + struct io_ring_ctx *ctx = cmd_to_io_kiocb(cmd)->ctx;
> + struct ublk_device *ub = cmd->file->private_data;
> + int index = (int)ub_cmd->addr, ret;
Make index an unsigned to match io_buffer_register_bvec()? Same
comment for ublk_unregister_io_buf().
> + struct ublk_rq_data *data;
> + struct request *req;
> +
> + if (!ub)
> + return -EPERM;
__ublk_ch_uring_cmd() has already dereferenced ub =
cmd->file->private_data, how is it possible to hit this? Same comment
for ublk_unregister_io_buf()
> +
> + req = __ublk_check_and_get_req(ub, ubq, tag, 0);
Consider moving the offset > blk_rq_bytes(req) check from
__ublk_check_and_get_req() to ublk_check_and_get_req() so we don't
need to pass an unused offset here.
> + if (!req)
> + return -EINVAL;
> +
> + data = blk_mq_rq_to_pdu(req);
data appears unused in this function. Same comment for ublk_unregister_io_buf().
> + ret = io_buffer_register_bvec(ctx, req, ublk_io_release, index,
> + issue_flags);
> + if (ret) {
> + ublk_put_req_ref(ubq, req);
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> +static int ublk_unregister_io_buf(struct io_uring_cmd *cmd,
> + struct ublk_queue *ubq, int tag,
> + const struct ublksrv_io_cmd *ub_cmd,
> + unsigned int issue_flags)
Make tag an unsigned to match __ublk_ch_uring_cmd() and
blk_mq_tag_to_rq()? Same comment for ublk_register_io_buf().
> +{
> + struct io_ring_ctx *ctx = cmd_to_io_kiocb(cmd)->ctx;
> + struct ublk_device *ub = cmd->file->private_data;
> + int index = (int)ub_cmd->addr;
> + struct ublk_rq_data *data;
> + struct request *req;
> +
> + if (!ub)
> + return -EPERM;
> +
> + req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
> + if (!req)
> + return -EINVAL;
> +
> + data = blk_mq_rq_to_pdu(req);
> + io_buffer_unregister_bvec(ctx, index, issue_flags);
Should we check that the registered bvec actually corresponds to this
ublk request? Otherwise, I don't see a reason for the unregister
command to involve the ublk request at all. Perhaps a generic io_uring
"unregister buffer index" operation similar to IORING_OP_FILES_UPDATE
would make more sense?
Best,
Caleb
> + return 0;
> +}
> +
> static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
> unsigned int issue_flags,
> const struct ublksrv_io_cmd *ub_cmd)
> @@ -1798,6 +1891,11 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
>
> ret = -EINVAL;
> switch (_IOC_NR(cmd_op)) {
> + case UBLK_IO_REGISTER_IO_BUF:
> + return ublk_register_io_buf(cmd, ubq, tag, ub_cmd, issue_flags);
> + case UBLK_IO_UNREGISTER_IO_BUF:
> + return ublk_unregister_io_buf(cmd, ubq, tag, ub_cmd,
> + issue_flags);
> case UBLK_IO_FETCH_REQ:
> /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
> if (ublk_queue_ready(ubq)) {
> @@ -1872,36 +1970,6 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
> return -EIOCBQUEUED;
> }
>
> -static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
> - struct ublk_queue *ubq, int tag, size_t offset)
> -{
> - struct request *req;
> -
> - if (!ublk_need_req_ref(ubq))
> - return NULL;
> -
> - req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
> - if (!req)
> - return NULL;
> -
> - if (!ublk_get_req_ref(ubq, req))
> - return NULL;
> -
> - if (unlikely(!blk_mq_request_started(req) || req->tag != tag))
> - goto fail_put;
> -
> - if (!ublk_rq_has_data(req))
> - goto fail_put;
> -
> - if (offset > blk_rq_bytes(req))
> - goto fail_put;
> -
> - return req;
> -fail_put:
> - ublk_put_req_ref(ubq, req);
> - return NULL;
> -}
> -
> static inline int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd,
> unsigned int issue_flags)
> {
> @@ -2527,9 +2595,6 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
> goto out_free_dev_number;
> }
>
> - /* We are not ready to support zero copy */
> - ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
> -
> ub->dev_info.nr_hw_queues = min_t(unsigned int,
> ub->dev_info.nr_hw_queues, nr_cpu_ids);
> ublk_align_max_io_size(ub);
> @@ -2860,7 +2925,7 @@ static int ublk_ctrl_get_features(struct io_uring_cmd *cmd)
> {
> const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
> void __user *argp = (void __user *)(unsigned long)header->addr;
> - u64 features = UBLK_F_ALL & ~UBLK_F_SUPPORT_ZERO_COPY;
> + u64 features = UBLK_F_ALL;
>
> if (header->len != UBLK_FEATURES_LEN || !header->addr)
> return -EINVAL;
> diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
> index a8bc98bb69fce..74246c926b55f 100644
> --- a/include/uapi/linux/ublk_cmd.h
> +++ b/include/uapi/linux/ublk_cmd.h
> @@ -94,6 +94,10 @@
> _IOWR('u', UBLK_IO_COMMIT_AND_FETCH_REQ, struct ublksrv_io_cmd)
> #define UBLK_U_IO_NEED_GET_DATA \
> _IOWR('u', UBLK_IO_NEED_GET_DATA, struct ublksrv_io_cmd)
> +#define UBLK_U_IO_REGISTER_IO_BUF \
> + _IOWR('u', 0x23, struct ublksrv_io_cmd)
> +#define UBLK_U_IO_UNREGISTER_IO_BUF \
> + _IOWR('u', 0x24, struct ublksrv_io_cmd)
>
> /* only ABORT means that no re-fetch */
> #define UBLK_IO_RES_OK 0
> --
> 2.43.5
>
next prev parent reply other threads:[~2025-02-19 2:36 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-18 22:42 [PATCHv4 0/5] ublk zero-copy support Keith Busch
2025-02-18 22:42 ` [PATCHv4 1/5] io_uring: move fixed buffer import to issue path Keith Busch
2025-02-19 1:27 ` Caleb Sander Mateos
2025-02-19 4:23 ` Ming Lei
2025-02-19 16:48 ` Pavel Begunkov
2025-02-19 17:15 ` Pavel Begunkov
2025-02-20 1:25 ` Keith Busch
2025-02-20 10:12 ` Pavel Begunkov
2025-02-18 22:42 ` [PATCHv4 2/5] io_uring: add support for kernel registered bvecs Keith Busch
2025-02-19 1:54 ` Caleb Sander Mateos
2025-02-19 17:23 ` Pavel Begunkov
2025-02-20 10:31 ` Pavel Begunkov
2025-02-20 10:38 ` Pavel Begunkov
2025-02-18 22:42 ` [PATCHv4 3/5] ublk: zc register/unregister bvec Keith Busch
2025-02-19 2:36 ` Caleb Sander Mateos [this message]
2025-02-20 11:11 ` Pavel Begunkov
2025-02-24 21:02 ` Keith Busch
2025-02-18 22:42 ` [PATCHv4 4/5] io_uring: add abstraction for buf_table rsrc data Keith Busch
2025-02-19 3:04 ` Caleb Sander Mateos
2025-02-18 22:42 ` [PATCHv4 5/5] io_uring: cache nodes and mapped buffers Keith Busch
2025-02-19 4:22 ` Caleb Sander Mateos
2025-02-24 21:01 ` Keith Busch
2025-02-24 21:39 ` Caleb Sander Mateos
2025-02-20 11:08 ` Pavel Begunkov
2025-02-20 15:24 ` Keith Busch
2025-02-20 16:06 ` Pavel Begunkov
2025-02-24 21:04 ` Keith Busch
2025-02-25 13:06 ` Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CADUfDZq-LnAeP17GAdqGAPzCY77hrj+V+yEVi7G=_Uv4a3txaw@mail.gmail.com' \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox